Return to MUSA 801 Projects Page

Introduction

This project was completed as part of the Master in Urban Spatial Analytics Spring 2021 Practicum instructed by Ken Steif, Michael Fichman, and Matt Harris. We thank our instructors as well as Dexter Locke and Lara Roman from the United States Forest Service for their help navigating the data and for their feedback.

This document is intended to help other researchers replicate a study of tree canopy loss linked to spatial risk factors. We present three primary outcomes: an analysis of the spatial attributes of tree canopy loss, a model to predict future tree canopy loss risk, and a policy tool which visualizes future tree canopy loss under various construction scenarios. We include hyperlinks thorough our document and in the appendix for reproducibility.

Our hope is that this document and our policy tool can help tree planting agencies, tree advocates, and the interested public understand the forces contributing to tree canopy loss in Philadelphia and envision different future scenarios for our tree canopy.

#Reload
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(scipen=10000000)
library(knitr)
library(MLeval)
library(magrittr)
library(grid)
library(RSocrata)
library(ggcorrplot)
library(randomForest)
library(dplyr)
library(caret)
library(tidyverse)
library(tidycensus)
library(sf)
library(kableExtra)
library(viridis)
library(mapview)
library(lubridate)
library(gridExtra)
library(ggplot2)
library(ggmap)
library(jsonlite)
library(entropy)
library(tidyr)
library(FNN) 
library(pROC)
library(plotROC)
library(RANN)


root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")

paletteGray <- c("gray90", "gray70", "gray50", "gray30", "gray10")

palette1 <- c("#d2f2d4","#7be382","#26cc00","#22b600","#009c1a")

palette2 <- c("#ffffff","#c7ffd5","#81d4ac","#329D9C","#205072", "#0B0138")

palette3 <- c("#2C5F2D","#97BC62FF")

palette4 <- c("#007F5F", '#EEEF20', '#AACC00')

paletteHolc <- c("light green", "light blue", "yellow", "pink")

palette5 <- c("#205072", "#329D9C", "#56C596", "#7BE495", "#CFF4D2", "#05001c")

palette6 <- c("#D8F3DC", "#B7E4C7", "#95D5B2", "#74C69D", "#52B788", "#40916C", "#2D6A4F", "#1B4332", "#123024", "#081C15")

GoalPalette <- c("light blue", "#D8F3DC",  "#95D5B2",  "#52B788",  "#2D6A4F", "#1B4332")
GainPalette <- c("#D8F3DC",  "#95D5B2",  "#52B788",  "#2D6A4F","light blue")

palette7 <- c("#D8F3DC", "#95D5B2", "#52B788", "#2D6A4F", "#1B4332", "#123024")

GoalPalette2 <- c("#95D5B2", "#52B788", "#2D6A4F", "#1B4332", "#123024", "blue")

palette8 <- c("#2D6A4F", "#52B788", "#B7E4C7", "#ADE8F4", "#00B4D8", "#023E8A")

paletteJ <- c("green", "red", "yellow")

paletteLG <- c("#1B4332","#2D6A4F", "#52B788","#D8F3DC", "blue")

landusepal <- c("green", "dark green" )

paletteDiverge <- c("red", "orange", "yellow", "blue", "green")

mapTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 14,colour = "black"),
    plot.subtitle=element_text(face="italic"),
    plot.caption=element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(), axis.title = element_blank(),
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "white", fill=NA, size=2)
  )
}


plotTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 14,colour = "black"),
    plot.subtitle = element_text(face="italic"),
    plot.caption = element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_line("grey80", size = 0.1),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "white", fill=NA, size=2),
    strip.background = element_rect(fill = "grey80", color = "white"),
    strip.text = element_text(size=12),
    axis.title = element_text(size=12),
    axis.text = element_text(size=10),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.title = element_text(colour = "black", face = "italic"),
    legend.text = element_text(colour = "black", face = "italic"),
    strip.text.x = element_text(size = 14)
  )
}


#for shared legends
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}
# Tree Canopy
TreeCanopy <-
 #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/TreeUse/TreeCanopyChange_2008_2018.shp")%>%
st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/TreeCanopyChange_2008_2018-shp/TreeCanopyChange_2008_2018.shp") %>%
# st_read("C:/Users/agarw/Documents/MUSA 801/Data/TreeCanopyChange_2008_2018.shp") %>%
#st_read("C:/Users/Prince/Desktop/Tree Canopy Data/TreeCanopyChange_2008_2018.shp") %>%
  st_transform('ESRI:102728') 

TreeCanopy$SHAPE_Area <- as.numeric(st_area(TreeCanopy))

Loss <- TreeCanopy %>%
  filter(CLASS_NAME == "Loss")

# Philadelphia Base Map
Philadelphia <- st_read("http://data.phl.opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson")%>%
  st_transform('ESRI:102728') %>%
  st_make_valid()

# Neighborhoods
Neighborhood <-
 #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/Neighborhoods/Neighborhoods_Philadelphia.shp") %>%
st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.shp")%>%
 # st_read("C:/Users/agarw/Documents/MUSA 801/Data/Neighborhoods_Philadelphia.shp") %>%
#st_read("C:/Users/Prince/Desktop/Tree Canopy Data/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.shp") %>%
  st_transform('ESRI:102728')

Neighborhood <-
  Neighborhood%>%
  mutate(NArea = st_area(Neighborhood))%>%
  mutate(NArea = as.numeric(NArea))

#make Philadelphia basemap
ll <- function(dat, proj4 = 4326){
  st_transform(dat, proj4)}


base_map <- get_stamenmap(c(left = -75.34937, bottom = 39.84524, right = -74.92109, top = 40.17457),
                           maptype = "terrain-lines")

 RMBound <- Neighborhood %>%  
   dplyr::filter(NAME=="RICHMOND")
 Richmond <- st_intersection(st_make_valid(TreeCanopy), RMBound)
 RMbase_map <- get_map(location = unname(st_bbox(ll(st_buffer(st_centroid(RMBound),10000)))),
                     maptype = "satellite")
 
  URBound <- Neighborhood %>%  
   dplyr::filter(NAME=="UPPER_ROXBOROUGH")
 UpperRoxborough <- st_intersection(st_make_valid(TreeCanopy), URBound)
 URbase_map <- get_map(location = unname(st_bbox(ll(st_buffer(st_centroid(URBound),10000)))),
                     maptype = "satellite")


HOLC <- 
  st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/PAPhiladelphia1937.geojson") %>%
# st_read("C:/Users/Prince/Desktop/Tree Canopy Data/PAPhiladelphia1937.geojson") %>%
  #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/PAPhiladelphia1937.geojson")%>%
# st_read("C:/Users/agarw/Documents/MUSA 801/Data/PAPhiladelphia1937.geojson")%>%
  st_transform('ESRI:102728')

HOLC <-
  HOLC%>%
  mutate(HOLCArea = st_area(HOLC))%>%
  mutate(HOLCArea = as.numeric(HOLCArea)) %>%
  dplyr::select(holc_grade)


# ACS

census_api_key("d9ebfd04caa0138647fbacd94c657cdecbf705e9", install = FALSE, overwrite = TRUE)

# Variables: Median Rent, Median HH Income, Population, Bachelor's, No Vehicle (home owner, renter), Households (owner, renter-occupied), total housing units, white

ACS <-  

  get_acs(geography = "tract", variables = c("B25058_001E", "B19013_001E", "B01003_001E", "B06009_005E", "B25044_003E", "B25044_010E", "B07013_002E", "B07013_003E", "B25001_001E", "B01001A_001E", "B04004_051E"), 
          year=2018, state=42, county=101, geometry=T) %>% 
  st_transform('ESRI:102728')


#Change to wide form
ACS <- 
  ACS %>%
  dplyr::select( -NAME, -moe) %>%
  spread(variable, estimate) %>%
  dplyr::select(-geometry) %>%
  rename(Rent = B25058_001, 
         medHHInc = B19013_001,
         population = B01003_001, 
         bachelor = B06009_005,
         noVehicle_hmow = B25044_003, 
         noVehicle_hmre = B25044_010,
         Households_hmow = B07013_002,
         Households_hmre = B07013_003,
         housing_units = B25001_001,
         white = B01001A_001, 
         italian = B04004_051)
st_drop_geometry(ACS)[1:3,]


ACS <- 
  ACS %>%
  mutate(pctBach = ifelse(population > 0, bachelor / population, 0),
         pctWhite = ifelse(population > 0, white / population, 0),
         pctNoVehicle = ifelse(Households_hmow + Households_hmre > 0, 
                               (noVehicle_hmow + noVehicle_hmre) / 
                                  (Households_hmow + Households_hmre),0),
         year = "2018") %>%
  dplyr::select(-Households_hmow,-Households_hmre,-noVehicle_hmow,-noVehicle_hmre,-bachelor, -white)


parcels <- st_read("http://data-phl.opendata.arcgis.com/datasets/1c57dd1b3ff84449a4b0e3fb29d3cafd_0.geojson")%>%
 st_transform('ESRI:102728')

const_permits <- 
 #st_read("C:/Users/agarw/Documents/MUSA 801/Data/const_permit.csv") %>%
 # st_read("C:/Users/Prince/Desktop/Tree Canopy Data/const_permit.csv") %>%
#  st_read("C:/Users/Kyle McCarthy/Documents/Practicum/permits.csv")%>%
  st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/const_permit.csv")%>%
  mutate(X = geocode_x,
         Y = geocode_y) 
const_permits[const_permits==""]<-NA
const <- const_permits %>% drop_na(X,Y)
const_spatial <- st_as_sf(const, coords = c("X","Y"), crs = 6565, agr = "constant")
const_spatial <- const_spatial %>% st_transform('ESRI:102728')
# const <- const_spatial %>%
#   filter(permitissuedate <= '30/06/2017 00:00')

const_spatial$permit_issue <- mdy_hm(const$permitissuedate)
const_spatial$year <- year(const_spatial$permit_issue)

const <- const_spatial %>%
   filter(year <= "2017")


LandUseFishnet <- 
  #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/Fishnet_LU_Intersect/fishnet_LUS_Intersect_dissolve.shp")  %>%
 #  st_read("C:/Users/agarw/Documents/MUSA 801/Data/fishnet_LUS_Intersect_dissolve.geojson") %>%
 st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/fishnet_LUS_Intersect_dissolve.geojson") %>%
#  st_read("C:/Users/Prince/Desktop/Tree Canopy Data/fishnet_LUS_Intersect_dissolve.geojson") %>%
   st_transform('ESRI:102728')


Hydrology <-
  st_read("https://opendata.arcgis.com/datasets/a31f7d7469404e919517e038fc133a8e_0.geojson")%>%
    st_transform('ESRI:102728')
  
Hydrology <-
  st_intersection(st_make_valid(Philadelphia), st_make_valid(Hydrology))

 
 Parks <- 
   st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/Parks") %>%
   #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/Parks/Philadelphia_PPR_Park_Boundaries2016.shp ")%>%
 # st_read("C:/Users/agarw/Documents/MUSA 801/Data/Philadelphia_PPR_Park_Boundaries2016.shp") %>%
   #st_read("C:/Users/Prince/Desktop/Tree Canopy Data/Philadelphia_PPR_Park_Boundaries2016.shp") %>%
     st_transform('ESRI:102728') %>%
   st_make_valid() %>%
   dplyr::select(geometry)

Conservation <- 
   st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/Conservation/ConservationEasements202103.shp") %>%
  #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/Conservation/ConservationEasements202103.shp")%>%
  # st_read("C:/Users/agarw/Documents/MUSA 801/Data/ConservationEasements202103.shp") %>%
   #  st_read("C:/Users/Prince/Desktop/Tree Canopy Data/ConservationEasements202103.shp") %>%
   st_transform('ESRI:102728') %>%
  st_intersection(st_make_valid(Philadelphia), st_make_valid(Conservation)) %>%
  st_make_valid() %>%
   dplyr::select(geometry)

ConservationNParks <- rbind(Parks, Conservation)

Motivation

Between 2008 and 2018, Philadelphia experienced a net loss of more than 1000 football fields’ worth of tree canopy.

Most of this loss occurred in historically disenfranchised communities. In response, the City of Philadelphia has set important milestones for conserving and increasing the current tree canopy in the city. In this analysis, we model tree canopy loss in Philadelphia and identify risk factors. We additionally assess the city’s progress on the goals and how this varies across neighborhoods.

ggmap(base_map) +
  geom_sf(data = ll(Philadelphia), colour = "black", fill = "gray", alpha = 0.5, inherit.aes = FALSE) +
  labs(title = "Study Area Boundaries",
       subtitle = "Philadelphia, PA") +
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

Why does the tree canopy matter?

Tree canopy is defined as the area of land which, viewed from a bird’s eye view, is covered by trees. Trees offer important benefits to cities including mitigating the urban heat island effect, absorbing stormwater runoff, a habitat for wildlife, and aesthetic and recreational benefits. In addition, trees are a preventative public health measure which improves mental health, increases social interactions and activity, and reduces crime, violence, and stress. A 2020 study published in The Lancet found that if Philadelphia reaches 30% tree canopy in all of its neighborhoods, the city would see 403 fewer premature deaths per year. However, despite the importance of trees to the city’s health, appearance, and ecology, trees are regularly removed for reasons ranging from construction to homeowners’ personal preference.

Cherry blossom trees in Philadelphia’s Fairmount Park

Philadelphia’s Tree Canopy Goals

30% Tree Canopy in each neighborhood by 2025

Philadelphia under a previous mayoral administration set the goal of achieving 30% tree canopy coverage in all neighborhoods by 2025. Currently, the citywide average is 20%, although this is highly uneven. More affluent neighborhoods in the northwest that have many parks and large lawns have much higher tree canopy than industrial neighborhoods in south and north Philadelphia which are full of impervious surfaces. The neighborhoods which have lower canopy coverage tend to be historically disenfranchised, lower income, and predominantly communities of color. This is an environmental justice problem. Neighborhoods like Philadelphia’s Hunting Park have a much lower ratio of tree canopy to impervious surfaces than the citywide average, resulting in higher temperatures during summertime heat waves and poorer air quality.

1 acre of tree canopy within a 10 minute walk for all residents

13% of Philadelphia’s residents are considered under-served by green space, meaning that they live more than a 10 minute walk away from at least 1 acre of green space. The city has therefore set a goal of ensuring that all residents are no more than 10 minutes away from at least 1 acre of green space by 2035. To this end, the city is prioritizing adding green space in historically disenfranchised neighborhoods and making efforts to green spaces in schools and recreation centers.

Planning Tool: Canopy View

Using this information, we created a predictive scenario planning policy tool which displays future tree canopy loss under three construction scenarios. The general population and tree planting agencies can use our tool to understand the potential impact of construction on the tree canopy and to prioritize tree planting efforts.

Canopy View web application

Data and Methods

In this analysis, we use the following data:

  • Independent variable:

  • Tree Canopy LiDAR data consisting of canopy polygons marked as loss, gain, or no change from 2008-2018 OpenDataPhilly

  • Neighborhood social attributes:

  • 2018 American Community Survey data from the U.S. Census Bureau

  • Neighborhoood level risk factors:

  • Construction

  • Parcels

  • Streets

  • Tree-related 311 requests

  • Normalized difference vegetation index (NDVI)

  • Hydrology

  • Zoning

  • Health outcomes

Unit of analysis: the fishnet cell

Below, we visualize the study area for this analysis. We use fishnet grid cells as our unit of analysis to take advantage of high resolution spatial data such as 311 service requests. Below, Philadelphia is visualized as fishnet cells of 1615 feet, roughly the size of a city block. Roughly ____ % of Philadelphia’s total area consists of conservation easement, hydrology, and parks. We omit fishnet cells which consist of more than 20% of these categories to isolate land that may be protected or experience different tree canopy change processes. All in all, we have 1411 fishnet cells representing Philadelphia.

#make fishnet
fishnet2 <- 
  st_make_grid(Philadelphia,
               cellsize = 1615) %>%
  st_sf() %>%
  mutate(uniqueID = rownames(.))%>%
  st_transform('ESRI:102728')

FishnetIntersect <- st_intersection(fishnet2, ConservationNParks )%>% 
  mutate(Area = as.numeric(st_area(.)))%>%
  st_drop_geometry()%>%
  group_by(uniqueID)%>%
  summarise(ConservationArea = sum(Area))%>% 
  left_join(fishnet2, .)%>%
  mutate_all(funs(replace_na(.,0)))%>% 
  mutate(ConservationPct = ConservationArea /2608225 * 100) %>%
  filter(ConservationPct <= 20)

ggmap(base_map) +
  geom_sf(data = ll(ConservationNParks), fill = "magenta", colour = "transparent", inherit.aes = FALSE) +
  geom_sf(data = ll(FishnetIntersect), fill = "transparent", colour = "black", inherit.aes = FALSE) +
  labs(title = "Study Area Fishnet", 
       subtitle = "Philadelphia, PA") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

fishnet <- FishnetIntersect

Exploratory Analysis

Our analysis in the following sections addresses the following questions:
  1. What does Philadelphia’s tree canopy look like today?
  2. How close are neighborhoods to achieving the 30% goal?
  3. How does tree canopy change vary by demographic?
  4. How do current patterns of tree canopy coverage and loss reflect disinvestment as a result of redlining and older planning practices?
  5. What other factors are associated with the tree canopy?

1. What does Philadelphia’s tree canopy look like today?

Philadelphia’s 2018 Canopy

First, we aggregate our tree canopy data to the fishnet so that we have gain, loss, and total canopy area for each cell. Northwest, West, and Northeast Philadelphia have the most tree canopy. In North and South Philadelphia as well as Center City, tree canopy is more sparse.

# Make canopy loss layers
TreeCanopyAll<- 
  TreeCanopy%>%
  st_make_valid()%>%
  st_intersection(fishnet)

TreeCanopyAll<- 
  TreeCanopyAll%>%
  mutate(TreeArea = st_area(TreeCanopyAll))%>%
  mutate(TreeArea = as.numeric(TreeArea))

# Tree Canopy Loss

TreeCanopyLoss <- 
  TreeCanopyAll%>%
  filter(CLASS_NAME == "Loss") %>%
  group_by(uniqueID)%>%
  summarise(AreaLoss = sum(TreeArea))%>%
  mutate(pctLoss = AreaLoss / 2608225)%>%
  st_drop_geometry()
  
TreeCanopyLoss<- 
  fishnet%>%
  left_join(TreeCanopyLoss, by = 'uniqueID')%>%
  dplyr::select('uniqueID', 'AreaLoss')%>%
  st_drop_geometry()


# Tree Canopy Gain 

TreeCanopyGain <- 
  TreeCanopyAll%>%
  filter(CLASS_NAME == "Gain")%>%
  group_by(uniqueID)%>%
  summarise(AreaGain = sum(TreeArea))%>%
  mutate(pctGain = AreaGain / 2608225)%>%
  st_drop_geometry()%>%
  dplyr::select(uniqueID, pctGain, AreaGain)

TreeCanopyGain<- 
  fishnet%>%
  left_join(TreeCanopyGain, by = 'uniqueID')%>%
  dplyr::select('uniqueID', 'AreaGain')%>%
  st_drop_geometry()

# Tree Canopy Coverage

TreeCanopyCoverage18 <- 
  TreeCanopyAll%>%
  filter(CLASS_NAME != "Loss")%>%
  group_by(uniqueID)%>%
  summarise(AreaCoverage18 = sum(TreeArea))%>%
  mutate(pctCoverage18 = (AreaCoverage18 / 2608225) * 100) %>%
  st_drop_geometry()

TreeCanopyCoverage18<- 
  fishnet%>%
  left_join(TreeCanopyCoverage18)%>%
  dplyr::select('uniqueID', 'AreaCoverage18', 'pctCoverage18')%>%
  st_drop_geometry()

TreeCanopyCoverage08 <- 
  TreeCanopyAll%>%
  filter(CLASS_NAME != "Gain")%>%
  group_by(uniqueID)%>%
  summarise(AreaCoverage08 = sum(TreeArea))%>%
  mutate(pctCoverage08 = (AreaCoverage08 / 2608225) * 100)%>%
  st_drop_geometry()

TreeCanopyCoverage08<- 
  fishnet%>%
  left_join(TreeCanopyCoverage08)%>% 
  dplyr::select('uniqueID', 'AreaCoverage08', 'pctCoverage08')%>%
  st_drop_geometry()


FinalFishnet <- 
  left_join(TreeCanopyLoss, TreeCanopyGain)%>%
  left_join(TreeCanopyCoverage18)%>%
  left_join(TreeCanopyCoverage08)%>%
  mutate_all(~replace(., is.na(.), 0))%>%
  mutate(pctLoss = round(AreaLoss/AreaCoverage08 * 100, 2), 
         pctGain = round(AreaGain/AreaCoverage18 * 100, 2))%>%
  mutate(pctChange = ifelse(AreaCoverage08 > 0, ((AreaCoverage18 - AreaCoverage08) / (AreaCoverage08) * 100), 0)) %>%
  mutate(netChange = AreaGain - AreaLoss)%>%
  dplyr::select(pctLoss, pctGain, pctChange,
                AreaGain, AreaLoss, AreaCoverage08, AreaCoverage18, pctCoverage08, pctCoverage18, uniqueID, pctChange, netChange)

FinalFishnet<- 
  fishnet%>%
  left_join(FinalFishnet)

FinalFishnet<- 
  FinalFishnet%>% 
  mutate(pctChange = (AreaCoverage18 - AreaCoverage08) / (AreaCoverage08) * 100)%>%
  mutate(netChange = AreaGain - AreaLoss, 
         LogPctLoss = log10(pctLoss), 
         LogPctGain = log10(pctGain))
  


FinalFishnet$pctCoverage08Cat <- cut(FinalFishnet$pctCoverage08, 
                      breaks = c(-Inf,  10,  20, 30, 50, Inf), 
                       labels = c("Very Little Tree Canopy", "Some Tree Canopy", "Moderate Tree Canopy", "High Tree Canopy (Met Goal)", "Extremely High Tree Canopy (Met Goal)"), 
                       right = FALSE)

FinalFishnet$pctCoverage18Cat <- cut(FinalFishnet$pctCoverage18, 
                      breaks = c(-Inf,  10,  20, 30, 50, Inf), 
                       labels = c("Very Little Tree Canopy", "Some Tree Canopy", "Moderate Tree Canopy", "High Tree Canopy (Met Goal)", "Extremely High Tree Canopy (Met Goal)"), 
                       right = FALSE)

FinalFishnet$AreaLossCat <- cut(FinalFishnet$AreaLoss, 
                       breaks = c(-Inf, 50000, 100000, 200000, 400000, 800000, Inf), 
                       labels = c("Less Than 50,000", "50,000 - 100,000", "100,000-200,000", "200,000 - 400,000", "400,000 - 800,000", "> 800,000"), 
                       right = FALSE)

FinalFishnet$AreaGainCat <- cut(FinalFishnet$AreaGain, 
                       breaks = c(-Inf, 50000, 100000, 200000, 400000, 800000, Inf), 
                       labels = c("Less Than 50,000", "50,000 - 100,000", "100,000-200,000", "200,000 - 400,000", "400,000 - 800,000", "> 800,000"), 
                       right = FALSE)

FinalFishnet$pctGainCat <- cut(FinalFishnet$pctGain, 
                      breaks = c(-Inf,20, 40, 60, 80, Inf), 
                       labels = c("0%-20% Gain", "20%-40% Gain", "40%-60% Gain", "60%-80% Gain", "80%-100% Gain"), 
                       right = FALSE)

FinalFishnet$pctLossCat <- cut(FinalFishnet$pctLoss, 
                       breaks = c(-Inf,20, 40, 60, 80, Inf), 
                       labels = c("0%-20% Loss", "20%-40% Loss", "40%-60% Loss", "60%-80% Loss", "80%-100% Loss"), 
                       right = FALSE)
ggmap(base_map) +
  geom_sf(data = ll(FinalFishnet), colour = "transparent", inherit.aes = FALSE, aes(fill = pctCoverage18Cat))+ 
     scale_fill_manual(values = palette7, 
                    name = "2018 Percent Coverage")+ 
  labs(title = "Tree Canopy, 2018", 
subtitle = "Tree Canopy Area / Gridcell Area") + 
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

Tree Canopy Change

Surprisingly, tree canopy loss and gain exhibit similar spatial patterns. Both percent gain and loss are highest in a few neighborhoods in South Philadelphia and in the Northeast. While this seems counter-intuitive, this is because these neighborhoods have the least tree canopy, therefore each individual tree gained or lost has a greater overall impact.

v <- ggmap(base_map) +
  geom_sf(data = ll(FinalFishnet), colour = "transparent", aes(fill = pctLoss), inherit.aes = FALSE)+
 # scale_fill_manual(values = palette7,name = "Percent Loss")+
  labs(title= "Percent Tree Canopy Loss", 
       subtitle = "Tree Canopy Lost From 2008 - 2018 / Total Tree Canopy Coverage in 2008")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

Z <- ggmap(base_map) +
  geom_sf(data = ll(FinalFishnet),  colour = "transparent", aes(fill = pctGain), inherit.aes = FALSE)+ 
 # scale_fill_manual(values = palette7,name = "Percent Gain")+
  labs(title= "Percent Tree Canopy Gain", 
       subtitle = "Tree Canopy Gained From 2008 - 2018 / Total Tree Canopy Coverage in 2018")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

grid.arrange(ncol=2,
v + scale_fill_distiller(palette = "YlOrRd", direction = 1, name = "% Loss"),
Z + scale_fill_distiller(palette = "YlGr", direction = 1, name = "% Gain"))

FinalFishnet$PctChangeCat <- cut(FinalFishnet$pctChange, 
                       breaks = c(-Inf,  -30, -10, 0, 10, 20, Inf), 
                       labels = c("Significant Net Loss", "Moderate Net Loss", "Low Net Loss", "Low Net Gain", "Moderate Net Gain", "Significant Net Gain"), 
                       right = FALSE)

When we plot the log-transformed percent tree canopy and percent tree canopy gained/lost for each fishnet cell, we see a negative relationship, meaning that fishnet cells with more trees experience less percentage change.

gain <- ggplot(FinalFishnet %>% filter(pctCoverage18 > 0.01 & pctGain > 1), aes(x = pctGain, y = pctCoverage18))+
    stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
    geom_smooth(method = "lm", se = FALSE, colour = "black") +
 # geom_point(colour = "black", alpha = 0.3, size = 4)+ 
      scale_x_log10() + scale_y_log10() +
  labs(title = "Gain vs. 2018 Tree Canopy (%)", 
       subtitle = "Density of fishnet cells") + 
  xlab("% Canopy Gain 2008 - 2018") + 
  ylab("% Tree Canopy 2018") + 
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +
    plotTheme()

loss <- ggplot(FinalFishnet %>% filter(pctCoverage18 > 0.01 & pctGain > 1), aes(x = pctLoss, y = pctCoverage18))+
    stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
    geom_smooth(method = "lm", se = FALSE, colour = "black") +
 # geom_point(colour = "black", alpha = 0.3, size = 4)+ 
      scale_x_log10() + scale_y_log10() +
  labs(title = "Loss vs. 2018 Tree Canopy (%)", 
       subtitle = "Density of fishnet cells") + 
  xlab("% Canopy Loss 2008 - 2018") + 
  ylab("% Tree Canopy 2018") + 
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +
    plotTheme()

pctChangeGraph <- 
  FinalFishnet%>% 
  filter(pctChange < 100)

change <- ggplot(pctChangeGraph, aes(x = pctChange, y = pctCoverage18))+
  stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
    geom_smooth(method = "lm", se = FALSE, colour = "black") +
 # geom_point(colour = "black", alpha = 0.3, size = 4)+ 
  labs(title = "Change vs. 2018 Tree Canopy (%)") + 
  xlab("% Change in Tree Canopy from 2008-2018 ") + 
  ylab("2018 Tree Canopy") + 
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +
    plotTheme()


grid.arrange(gain, loss, change, top = "Tree Canopy Change and Existing Tree Canopy", ncol = 2)

Citywide, the majority of fishnet cells experienced low net loss, while a similar amount of cells experienced either substantial loss or gain.

b <- ggmap(base_map) +
  geom_sf(data = ll(FinalFishnet), inherit.aes = FALSE, colour = "transparent", aes(fill = PctChangeCat))+ 
  labs(title= "Tree Canopy Net Change 2008-2018", 
       subtitle = "Percent Tree Canopy Gain - Percent Tree Canopy Loss") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

b + scale_fill_brewer(palette = "PiYG", name = "Percent Change", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE))

fishnet_centroid <- FinalFishnet%>%
  st_centroid()

tractNames <- ACS %>%
  dplyr::select(GEOID) 

FinalFishnet1 <- fishnet_centroid %>%
  st_join(., tractNames) %>%
  st_drop_geometry() %>%
  left_join(FinalFishnet,.,by="uniqueID") %>%
  dplyr::select(GEOID) %>%
  mutate(uniqueID = as.numeric(rownames(.)))

# Make fishnet with ACS variables
ACS_net <-   
  FinalFishnet1 %>%
  st_drop_geometry() %>%
  group_by(uniqueID, GEOID) %>%
  summarize(count = n()) %>%
    full_join(FinalFishnet1) %>%
    st_sf() %>%
    na.omit() %>%
    ungroup() %>%
  st_centroid() %>% 
  dplyr::select(uniqueID) %>%
  st_join(., ACS) %>%
  st_drop_geometry() %>%
  left_join(.,FinalFishnet1) %>%
  st_as_sf()%>%
  st_drop_geometry()%>%
  mutate(uniqueID = as.character(uniqueID))

FinalFishnet <- 
  FinalFishnet%>%
  left_join(ACS_net)

2. How close are neighborhoods to the 30% tree canopy goal?

# Tree Loss by Neighborhood
TreeCanopyAllN<- 
  TreeCanopy%>%
  st_make_valid()%>%
  st_intersection(Neighborhood)

TreeCanopyAllN<- 
  TreeCanopyAllN%>%
  mutate(TreeArea = st_area(TreeCanopyAllN))%>%
  mutate(TreeArea = as.numeric(TreeArea))

TreeCanopyLossN <- 
  TreeCanopyAllN%>%
  filter(CLASS_NAME == "Loss")%>%
  group_by(NAME, NArea)%>%
  summarise(AreaLoss = sum(TreeArea))%>%
  mutate(pctLoss = AreaLoss / NArea)%>%
  mutate(pctLoss = as.numeric(pctLoss))%>%
  st_drop_geometry()

TreeCanopyLossN<- 
  Neighborhood%>%
  left_join(TreeCanopyLossN)


# Tree Gain 

TreeCanopyGainN <- 
  TreeCanopyAllN%>%
  filter(CLASS_NAME == "Gain")%>%
  group_by(NAME, NArea)%>%
  summarise(AreaGain = sum(TreeArea))%>%
  mutate(pctGain = AreaGain / NArea)%>%
  mutate(pctGain = as.numeric(pctGain))%>%
  st_drop_geometry()

TreeCanopyGainN<- 
  Neighborhood%>%
  left_join(TreeCanopyGainN)


TreeCanopyCoverageN18 <- 
  TreeCanopyAllN%>%
  filter(CLASS_NAME != "Loss")%>%
  group_by(NAME, NArea)%>%
  summarise(AreaCoverage = sum(TreeArea))%>%
  mutate(AreaCoverage = as.numeric(AreaCoverage))%>%
  mutate(pctCoverage = AreaCoverage / NArea * 100)


TreeCanopyLoss1N <- 
  TreeCanopyLossN%>%
  dplyr::select('NAME', 'AreaLoss')%>%
  st_drop_geometry()

TreeCanopyGain1N <- 
  TreeCanopyGainN%>%
  dplyr::select('NAME', 'AreaGain')%>%
  st_drop_geometry()

TreeCanopyCoverageN118 <- 
  TreeCanopyCoverageN18%>%
  dplyr::select('NAME', 'AreaCoverage', 'pctCoverage')%>%
  st_drop_geometry()


FinalNeighborhood <- 
  left_join(TreeCanopyLoss1N, TreeCanopyGain1N)%>%
  left_join(TreeCanopyCoverageN118)%>%
  mutate_all(~replace(., is.na(.), 0))%>%
  mutate(GainMinusLoss = AreaGain - AreaLoss)%>%
  dplyr::select(GainMinusLoss,
                AreaGain, AreaLoss, NAME, pctCoverage, AreaCoverage)

FinalNeighborhood<- 
  Neighborhood%>%
  left_join(FinalNeighborhood)%>%
  mutate(LossOrGain = ifelse(GainMinusLoss > 0, "Gain", "Loss"))%>%
  mutate(PctUnderGoal = 30 - pctCoverage)

#grid.arrange(ncol=2,

FinalNeighborhood$GoalCat <- cut(FinalNeighborhood$PctUnderGoal, 
                      breaks = c(-Inf, 0, 6, 12, 18, 24, 30), 
                       labels = c("Reached Goal", "0-6% Under", "6-12% Under", "12-18% Under", "18-24% Under", "24-30% Under"), 
                       right = FALSE)

FinalNeighborhood$GainMinusLossCat <- cut(FinalNeighborhood$GainMinusLoss, 
                      breaks = c(-Inf, -4000000, -395242, -15647, 0, 500000, Inf), 
                       labels = c("Extreme Loss", "Substantial Loss", "Moderate Loss", "Low Loss", "Gain", "Substantial Gain"),
                       right = FALSE)

The neighborhoods farthest from 30% tree canopy are in South Philadelphia and parts of Center City. These neighborhoods are all along the Delaware River. The neighborhoods which meet this goal are largely in Northwest Philadelphia.

goalProg <- ggmap(base_map) +
  geom_sf(data = ll(FinalNeighborhood), aes(fill = GoalCat), color = "white", inherit.aes = FALSE) +
  labs(title = "30% Canopy Goal by Neighborhood, 2018",
       subtitle = "% Away from 30% tree canopy") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Canopy Status", direction = -1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE))
goalProg 

Most neighborhoods experienced a net loss in tree canopy. Interestingly, most tree gain was experienced in South Philadelphia and Center City, in neighborhoods which have low existing canopy.

netChange <- ggmap(base_map) +
  geom_sf(data = ll(FinalNeighborhood), aes(fill = GainMinusLossCat), inherit.aes = FALSE, color = "white") +
  labs(title = "Tree Canopy Net Change by Neighborhood, 2008-2018") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()+ 
  scale_fill_brewer(palette = "PiYG", name = "Net Change", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE))

netChange

A closer look: neighborhood-level tree canopy change

To better understand tree canopy change patterns in neighborhoods, we analyze more closely a neighborhood that has reached the goal and one that is substantially below the goal. In both, the bulk of the tree canopy did not change between 2008 and 2018. However, when we look closer, we see that tree canopy loss and gain are much more clustered in Upper Roxborough, which meets the goal. Here, it appears that some of the loss is related to tree removal along an entire street or throughout a property, whereas in Richmond, tree canopy change has been more piecemeal and scattered. This suggests land use and streets as potential risk factor variables related to tree canopy loss.

paletteChange <- c("green", "orange", "purple")


#reference map
refmap <- ggmap(base_map) +
  geom_sf(data = ll(Neighborhood), fill = "black", colour = "white", inherit.aes = FALSE) +
  geom_sf(data = ll(RMBound), colour = "white", fill = "green", inherit.aes = FALSE) +
    geom_sf(data = ll(URBound), colour = "white", fill = "green", inherit.aes = FALSE) +
  labs(title = "Richmond and Upper Roxborough Neighborhoods") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()
 
RM <- ggmap(RMbase_map) +
   geom_sf(data = ll(RMBound), fill = "black", inherit.aes = FALSE) +
   geom_sf(data = ll(Richmond), aes(fill = CLASS_NAME), colour = "transparent", inherit.aes = FALSE) +
   scale_fill_manual(values = paletteChange, name = "Canopy Change") +
   labs(title = "Richmond Tree Canopy Change 2008-2018", subtitle = "Philadelphia, PA") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()
 
#Upper Roxborough
UR <- ggmap(URbase_map) +
   geom_sf(data = ll(URBound), fill = "black", inherit.aes = FALSE) +
   geom_sf(data = ll(UpperRoxborough), aes(fill = CLASS_NAME), colour = "transparent", inherit.aes = FALSE) +
   scale_fill_manual(values = paletteChange, name = "Canopy Change") +
   labs(title = "Upper Roxborough Tree Canopy Change 2008-2018", subtitle = "Philadelphia, PA") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

grid.arrange(ncol=2, UR, refmap, RM)

3. How does tree canopy change vary by demographic?

Tree canopy loss, like other urban environmental issues, has deep implications for equity. Low income communities and communities of color are more likely to be in places with less tree canopy. Therefore, we explore demographics variables’ correlation with tree canopy gain, loss, net change, and coverage. Surprisingly, demographic variables have little correlation with existing tree canopy or tree canopy change. This is likely because they are accounted for by neighborhood fixed effects.

library(grid)
correlation.long <-
  st_drop_geometry(FinalFishnet) %>%
     dplyr::select(population, medHHInc, housing_units, Rent, pctBach, pctWhite, pctNoVehicle, netChange) %>%
    gather(Variable, Value, -netChange)


correlation.cor <-
  correlation.long %>%
    group_by(Variable) %>%
    summarize(correlation = cor(Value, netChange, use = "complete.obs"))


correlation.long1 <-
  st_drop_geometry(FinalFishnet) %>%
     dplyr::select(population, medHHInc, housing_units, Rent, pctBach, pctWhite, pctNoVehicle, pctLoss) %>%
    gather(Variable, Value, -pctLoss)

correlation.cor1 <-
  correlation.long1 %>%
    group_by(Variable) %>%
    summarize(correlation = cor(Value, pctLoss, use = "complete.obs"))



correlation.long2 <-
  st_drop_geometry(FinalFishnet) %>%
     dplyr::select(population, medHHInc, housing_units, Rent, pctBach, pctWhite, pctNoVehicle, pctGain) %>%
    gather(Variable, Value, -pctGain)

correlation.cor2 <-
  correlation.long2 %>%
    group_by(Variable) %>%
    summarize(correlation = cor(Value, pctGain, use = "complete.obs"))



correlation.long3 <-
  st_drop_geometry(FinalFishnet) %>%
     dplyr::select(population, medHHInc, housing_units, Rent, pctBach, pctWhite, pctNoVehicle, pctCoverage18) %>%
    gather(Variable, Value, -pctCoverage18)

correlation.cor3 <-
  correlation.long3 %>%
    group_by(Variable) %>%
    summarize(correlation = cor(Value, pctCoverage18, use = "complete.obs"))


#plots
netChange <- ggplot(correlation.long, aes(Value, netChange)) +
 stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
  geom_text(data = correlation.cor, aes(label = paste("r =", round(correlation, 2))),
            x=-Inf, y=Inf, vjust = 1.5, hjust = -.1) +
  geom_smooth(method = "lm", se = FALSE, colour = "black") +
  facet_wrap(~Variable, ncol = 2, scales = "free") +
  labs(title = "Net Tree Canopy Change") +
  plotTheme()
  
  # ggplot(correlation.long1, aes(Value, pctLoss)) +
  # geom_point(colour = "black", alpha = 0.3, size = .5) +
  # scale_y_log10() +
  # geom_text(data = correlation.cor1, aes(label = paste("r =", round(correlation, 2))),
  #           x=-Inf, y=Inf, vjust = 1.5, hjust = -.1) +
  # geom_smooth(method = "lm", se = FALSE, colour = "dark green") +
  # facet_wrap(~Variable, ncol = 2, scales = "free") +
  # labs(title = "Tree Canopy Loss") +
  # plotTheme(), 
  
pctGain <- ggplot(correlation.long2, aes(Value, pctGain)) +
stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
    geom_text(data = correlation.cor2, aes(label = paste("r =", round(correlation, 2))),
            x=-Inf, y=Inf, vjust = 1.5, hjust = -.1) +
  geom_smooth(method = "lm", se = FALSE, colour = "dark green") +
  facet_wrap(~Variable, ncol = 2, scales = "free") +
  labs(title = "Tree Canopy Gain") +
   plotTheme()

  # ggplot(correlation.long3, aes(Value, pctCoverage18)) +
  # geom_point(colour = "black", alpha = 0.3, size = .5) +
  # scale_y_log10() +
  # geom_text(data = correlation.cor3, aes(label = paste("r =", round(correlation, 2))),
  #           x=-Inf, y=Inf, vjust = 1.5, hjust = -.1) +
  # geom_smooth(method = "lm", se = FALSE, colour = "dark green") +
  # facet_wrap(~Variable, ncol = 2, scales = "free") +
  # labs(title = "Current Tree Canopy") + 
  # plotTheme()



grid.arrange(ncol=2, top=textGrob("Neighborhood Attributes and Tree Canopy 2008-2018"),netChange,pctGain) 

4. How do historical disinvestment and segregation relate to tree canopy loss?

Next, we examine redlining boundaries from the Homeowner’s Loan Corporation (HOLC). As trees take decades to grow, the spatial configuration of tree canopy and their change is influenced by historical planning decisions. In 1937, HOLC created “redlining” maps which rated neighborhoods’ desirability in four categories. They rated neighborhoods with residents of color as the least desirable and majority-white neighborhoods as the most desirable. As a result, the low-ranked neighborhoods experienced disinvestment and greater difficulty attracting investment.

HOLC2 <- HOLC %>%
  st_intersection(st_make_valid(Philadelphia), HOLC) %>%  
  st_union() %>%
  st_as_sf()

HOLC2 <- HOLC2 %>%
  st_sym_difference(Philadelphia, HOLC2) %>%
  st_as_sf() %>%
  mutate(holc_grade = "Unclassified") %>%
  dplyr::select(holc_grade) %>%
  as.data.frame %>%
  rename(geometry = x)

HOLC_plot <- full_join(as.data.frame(HOLC), as.data.frame(HOLC2)) %>%
  st_as_sf()


ggmap(base_map) +
   geom_sf(data = ll(Philadelphia), colour = "gray90", fill = "gray90", inherit.aes = FALSE) +
  scale_fill_brewer(palette = "PiYG", name = "HOLC Grade", direction = -1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
   geom_sf(data = ll(HOLC_plot), aes(fill = holc_grade), colour = "white", inherit.aes = FALSE) +
   labs(title = "1937 HOLC Redlining Boundaries", subtitle = "Philadelphia, PA") +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

 holc_net <- st_intersection(fishnet_centroid, HOLC) %>%
   dplyr::select(uniqueID, holc_grade)
   

FinalFishnet <-
   holc_net %>%
   st_drop_geometry() %>%
   left_join(FinalFishnet, .)%>%
   mutate(holc_grade = replace_na(holc_grade, "unclassified"))
  

 holcLoss <- FinalFishnet %>%
  dplyr::select(holc_grade, pctLoss) %>%
  group_by(holc_grade) %>%
  na.omit() %>%
  summarize(mean_loss = mean(pctLoss)) %>%
  ggplot() +
  geom_histogram(aes(y = mean_loss), binwidth = 1, fill = "magenta") +
  labs(title="Loss by HOLC Grade, 2008-2018 (%)",
       subtitle="Philadelphia, PA",
       x="HOLC Rating", 
       y="% Loss")+
  facet_wrap(~holc_grade, nrow = 1)+
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  plotTheme()
  
  
  
  holcCanopy <- FinalFishnet %>%
  dplyr::select(holc_grade, pctCoverage18) %>%
  group_by(holc_grade) %>%
  na.omit() %>%
  summarize(mean_cov = mean(pctCoverage18)) %>%
  ggplot() +
  geom_histogram(aes(y = mean_cov), binwidth = 1, fill = "green") +
  labs(title="Coverage by HOLC Grade, 2018 (%)",
       subtitle="Philadelphia, PA",
       x="HOLC Rating", 
       y="% Tree Canopy")+
  facet_wrap(~holc_grade, nrow = 1)+
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  plotTheme()

The plots below illustrate a correlation between tree canopy loss and HOLC ratings. Fishnet cells in highly rated areas experienced less percentage loss between 2008 and 2018, and they had more tree canopy to begin with.

library(grid)
grid.arrange(holcLoss, holcCanopy, top = "1937 HOLC Rating and Tree Canopy", ncol = 2)

5. What other factors influence tree canopy loss?

Based on our analysis of the spatial distribution of tree canopy loss, we consider a few more variables: land use, redlining, hydrology, construction, and health outcomes for potential features.

Hydrology <- Hydrology %>%
   dplyr::select(geometry) 

HydrologyNet <- st_buffer(Hydrology, 100) %>%
st_intersection(FinalFishnet, Hydrology) %>%
mutate(Area = as.numeric(st_area(.))) %>%
st_drop_geometry() %>%
group_by(uniqueID) %>%
summarise(HydrologyArea = sum(Area)) %>%
left_join(FinalFishnet, .) %>%
mutate_all(funs(replace_na(.,0)))%>%
mutate(HydrologyPct = HydrologyArea /2608225 * 100) 

HydrologyNet$HydrologyAreaCat <- cut(HydrologyNet$HydrologyArea, 
                      breaks = c(-Inf, 0, 382, 85305, 1637508, Inf), 
                       labels = c("None", "Little", "Some", "High", "Most"), 
                       right = FALSE)


FinalFishnet <-
   HydrologyNet %>%
   st_drop_geometry() %>%
   left_join(FinalFishnet, .)
# Parcels make little difference, except if you are looking at total net change (looks like a normal distribution). Im commenting it out, but feel free to play around with the data. 

#Parcels
parcels <-
  parcels%>%
 st_centroid()%>%
  dplyr::select(Shape__Area)


FinalFishnet1 <- st_join(FinalFishnet, parcels)

FinalFishnet<-
FinalFishnet1 %>%
  st_drop_geometry()%>%
  group_by(uniqueID)%>%
  summarise(avgParcelSize = mean(Shape__Area))%>%
  left_join(FinalFishnet, .)%>%
  mutate(LogAvgParcelSize = log10(avgParcelSize), 
         LogPctLoss = log10(pctLoss))
LandUse1 <-
  #st_read("C:/Users/Kyle McCarthy/Documents/Practicum/Data/LandUse/landu.csv")%>%
 # st_read("C:/Users/agarw/Documents/MUSA 801/Data/landu.csv")%>%
   st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/landu.csv") %>%
  #st_read("C:/Users/Prince/Desktop/Tree Canopy Data/landu.csv") %>%
  mutate(AreaGain = as.numeric(AreaGain),
         AreaLoss = as.numeric(AreaLoss),
         Coverage08 = as.numeric(Coverage08),
         Coverage18 = as.numeric(Coverage18))
 LandUse1 <- LandUse1%>% replace(is.na(LandUse1), 0)

 
LandUse<-
  LandUse1%>%
  group_by(Descriptio)%>%
  summarise(AreaGain = sum(AreaGain),
            AreaLoss = sum(AreaLoss),
            Coverage08 = sum(Coverage08),
            Coverage18 = sum(Coverage18))%>%
  mutate(pctLoss = as.numeric(AreaLoss / Coverage08),
         pctGain = as.numeric(AreaGain/ Coverage18),
         pctChange = (Coverage18 - Coverage08)/ (Coverage08) * 100,
         netChange = AreaGain - AreaLoss)


LandUseLong<-
  LandUse%>%
  mutate(AreaLoss = AreaLoss * -1)%>%
  dplyr::select(AreaGain, AreaLoss, Descriptio)%>%
  gather(variable, value, -c(Descriptio))%>%
  mutate(variable = ifelse(variable == "AreaGain", "Area Gain", "Area Loss"))


LandUseLong<-
  LandUse%>%
  mutate(pctLoss = pctLoss * -1)%>%
  dplyr::select(netChange, pctChange, Descriptio)%>%
  gather(variable, value, -c(Descriptio))

Land use

As previously mentioned, land use appears to be correlated with tree canopy. A 2018 report showed that the most trees were removed next to streets and on residential property. This proves true in our analysis: the top two land use types that experienced tree canopy change are residential and transportation.

LandUse2 <-
  LandUse1%>%
  filter(Descriptio == "Residential")%>%
  group_by(C_DIG2DESC)%>%
  summarise(AreaGain = sum(AreaGain),
            AreaLoss = sum(AreaLoss),
            Coverage08 = sum(Coverage08),
            Coverage18 = sum(Coverage18))%>%
  mutate(pctChange = (Coverage18 - Coverage08)/ (Coverage08) * 100,
         netChange = AreaGain - AreaLoss)


LandUse <- ggplot(LandUseLong, aes(fill = variable, y=Descriptio, x=value))+
  geom_bar(stat='identity', position = "stack", width = .75)+
 scale_fill_manual(values = c("light green", "magenta"), name = "Area Gain or Loss")+
  labs(title = "Loss and Gain",
       subtitle = "Philadelphia, 2008-2018")+
  xlab("        Total Area Lost                Total Area Gained")+
  ylab("Land Use Type") +
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  plotTheme()


resDensity <- ggplot(LandUse2, aes(y=C_DIG2DESC, x=pctChange))+
  geom_bar(stat='identity', fill="light green", width = 0.75)+
  labs(title = "Change on Residential Land",
       subtitle = "('18 Canopy - '08 Canopy) / ('18 Canopy) * 100 ")+
  ylab("Residential Land Use Type")+
  xlab("Percent Tree Canopy Change")+
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  plotTheme()
# )

grid.arrange(LandUse, resDensity, ncol = 2, top = "Land Use and Tree Canopy Change")

Health outcomes

We also check for a correlation between tree canopy loss and negative health outcomes. Contrary to our expectations, tree canopy loss has a weak correlation with asthma, leisure time used for physical activity, obesity, stroke, and self-related physical and mental health.

 publichealth <- 
   read.socrata("https://chronicdata.cdc.gov/resource/yjkw-uj5s.json") %>%
   filter(countyname == "Philadelphia") 
 
# Joining by GEOID to get geometry
 phillyhealth <- 
   merge(x = publichealth, y = ACS, by.y = "GEOID", by.x = "tractfips", all.x = TRUE)%>%
   st_as_sf()%>%
   mutate(tractarea = st_area(geometry)) %>%
   mutate(tractarea = as.numeric(tractarea))
 
 treecanopy_census <- 
  TreeCanopy%>%
  st_make_valid() %>%
  st_intersection(phillyhealth)
 
treecanopy_census<- 
  treecanopy_census%>%
  mutate(TreeArea = st_area(treecanopy_census))%>%
  mutate(TreeArea = as.numeric(TreeArea))

 # ggplot() +
 #   geom_sf(data = treecanopy_census, aes(fill = casthma_crudeprev))+
 #   plotTheme()
 # 
 
# Tree Canopy Loss

TreeCanopyLoss_census <- 
  treecanopy_census%>%
  filter(CLASS_NAME == "Loss")%>%
  group_by(tractfips,tractarea)%>%
  summarise(AreaLoss = sum(TreeArea))%>%
  mutate(pctLoss = AreaLoss/tractarea) %>%
  st_drop_geometry()

TreeCanopyGain_census <- 
  treecanopy_census%>%
  filter(CLASS_NAME == "Gain")%>%
  group_by(tractfips,tractarea)%>%
  summarise(AreaLoss = sum(TreeArea))%>%
  mutate(pctGain = AreaLoss/tractarea) %>%
  st_drop_geometry()

TreeCanopyNoC_census <- 
  treecanopy_census%>%
  filter(CLASS_NAME == "No Change")%>%
  group_by(tractfips,tractarea)%>%
  summarise(AreaLoss = sum(TreeArea))%>%
  mutate(pctNoC = AreaLoss/tractarea) %>%
  st_drop_geometry()

phillyhealth <-
  merge(x = TreeCanopyLoss_census, y = phillyhealth, by = "tractfips", all = TRUE)

phillyhealth <-
  merge(x = TreeCanopyGain_census, y = phillyhealth, by = "tractfips", all = TRUE)
  
phillyhealth <-
  merge(x = TreeCanopyNoC_census, y = phillyhealth, by = "tractfips", all = TRUE) 
phillyhealth <- phillyhealth %>%
  dplyr::select(tractfips,casthma_crudeprev,AreaLoss.x,pctGain,pctLoss,geometry,stroke_crudeprev,phlth_crudeprev, obesity_crudeprev,mhlth_crudeprev,lpa_crudeprev)%>%
  mutate(LogPctLoss = log10(pctLoss),
         LogPctGain = log10(pctGain)) %>%
  rename(Asthma = casthma_crudeprev,
         Leisure_time = lpa_crudeprev,
         Mental_health = mhlth_crudeprev,
         Obesity = obesity_crudeprev,
         Physical_health = phlth_crudeprev,
         Stroke = stroke_crudeprev) 


correlation.long4 <-
  phillyhealth%>%
     dplyr::select(Asthma, Physical_health,Stroke,
                Obesity,Mental_health,Leisure_time, pctLoss) %>%
    gather(Variable, Value, -pctLoss) %>%
  mutate(Value = as.numeric(Value))

correlation.cor4 <-
  correlation.long4 %>%
    group_by(Variable) %>%
    summarize(correlation = cor(Value, pctLoss, use = "complete.obs"))

ggplot(correlation.long4, aes(Value, pctLoss)) +
  stat_density2d(aes(fill = ..level..), geom = "polygon", bins = 20) +
  scale_fill_gradient(low="light green", high="magenta", name="Distribution") +
    scale_x_log10() + scale_y_log10() +
  geom_text(data = correlation.cor4, aes(label = paste("r =", round(correlation, 2))),
            x=-Inf, y=Inf, vjust = 1.5, hjust = -.1) +
  geom_smooth(method = "lm", se = FALSE, colour = "black") +
  facet_wrap(~Variable, ncol = 2, scales = "free") +
  labs(title = "Tree Canopy Loss and Census Tract Health Outcomes") +
  plotTheme()

e311Trees <-
 # st_read('C:/Users/Kyle McCarthy/Documents/Practicum/e311.csv') 
   st_read("/Users/annaduan/Desktop/Y3S2/Practicum/Data/streettree311.csv")
  #st_read('C:/Users/agarw/Documents/MUSA 801/Data/e311.csv')
#st_read("C:/Users/Prince/Desktop/Tree Canopy Data/e311.csv") %>%


e311Trees<-
  e311Trees%>%
  dplyr:: select(service_name, lat, lon)%>%
  mutate(lon = as.numeric(lon),
         lat = as.numeric(lat))%>%
    na.omit()%>%
  st_as_sf(.,coords=c("lon","lat"),crs=4326)%>%
  st_transform('ESRI:102728')


FinalFishnet <-
  e311Trees%>%
  mutate(e311Count = 1) %>%
  dplyr::select(e311Count)%>%
  st_join(FinalFishnet)%>% 
  st_drop_geometry()%>% 
  group_by(uniqueID)%>% 
  summarise(e311Count = sum(e311Count))%>% 
  mutate(e311Count = replace_na(e311Count, 0))%>%
  left_join(FinalFishnet, .)%>%
  mutate(Loge311Count = log10(e311Count),
         LogPctLoss = log10(pctLoss))

#CONSTRUCTION

const_spatial2 <-
  const_spatial %>%
  filter(permitdescription == 'RESIDENTIAL BUILDING PERMIT ' | permitdescription == 'COMMERCIAL BUILDING PERMIT')

const_net <- 
   dplyr::select(const_spatial) %>% 
   mutate(countConst = 1) %>% 
   aggregate(., Neighborhood, sum) %>%
   mutate(countConst = replace_na(countConst, 0),
          neigh = Neighborhood$NAME)

const_net_fish <- 
   dplyr::select(const_spatial) %>% 
   mutate(countConst = 1) %>% 
   aggregate(., fishnet, sum) %>%
   mutate(countConst = replace_na(countConst, 0),
          uniqueID = rownames(.))
          
const_net$countConstCat <- cut(const_net$countConst, 
                      breaks = c(-Inf,  1302,  3002, 5400, Inf), 
                       labels = c("Least permits", "Some permits", "Moderate permits", "Most permits"), 
                       right = FALSE)

# constructionPlot <- ggplot() +
#    geom_sf(data = const_net, aes(fill = as.factor(countConstCat)), color = NA) + scale_fill_brewer(palette = "Greys", direction = 1, aesthetics = c("fill"), guide = guide_legend(reverse = TRUE), name = "Permit Count") +
#    #scale_fill_viridis() +
#    labs(title = "Construction Permits by Neighborhood, 2020", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()

FinalFishnet <-
   const_net_fish %>%
   st_drop_geometry() %>%
   left_join(FinalFishnet, .)%>%
   mutate(countConst = replace_na(countConst, 0))





#Filter maximum construction permits by neighborhood

const_max_neigh <-
  const_net %>%
  filter(countConst == max(countConst))

Max_neigh <- Neighborhood %>%
  filter(NAME == const_max_neigh$neigh)

const_neigh <- st_intersection(const_spatial, Max_neigh)


Max_neigh_tree <- st_intersection(st_make_valid(TreeCanopy), Max_neigh) %>%
  filter(CLASS_NAME == 'Loss')

base_mapGH <- get_map(location =  unname(st_bbox(ll(st_buffer(st_centroid(Max_neigh_tree),1100)))),
                    maptype = "satellite") #get a basemap

pbNeigh <- ggmap(base_mapGH, darken = c(0.3, "white")) + 
  geom_sf(data = ll(Max_neigh), fill = "black", alpha = 0.8, inherit.aes = FALSE) +
  geom_sf(data = ll(Max_neigh_tree), aes(colour = CLASS_NAME), colour = "red", inherit.aes = FALSE) +
  geom_sf(data = ll(const_neigh), size = 0.1, inherit.aes = FALSE) +
  scale_fill_manual(values = paletteJ, name = "Canopy Change") +
  labs(title = "Point Breeze", subtitle = "Philadelphia, PA") +
    mapTheme()



#Richmond
 
 const_RM <- st_intersection(const_spatial, RMBound)

Richmond <- Richmond %>%
  filter(CLASS_NAME == 'Loss')
 
 base_mapRM <- get_map(location =  unname(st_bbox(ll(st_buffer(st_centroid(Richmond),1100)))),
                     maptype = "satellite") #get a basemap
 
rmNeigh<- ggmap(base_mapRM, darken = c(0.3, "white")) + 
   geom_sf(data = ll(RMBound), fill = "black", alpha = 0.8, inherit.aes = FALSE) +
   geom_sf(data = ll(Richmond), aes(colour = CLASS_NAME), colour = "red", inherit.aes = FALSE) +
   geom_sf(data = ll(const_RM), size = 0.1, inherit.aes = FALSE) +
   scale_fill_manual(values = paletteJ, name = "Canopy Change") +
   labs(title = "Richmond", subtitle = "Philadelphia, PA") +
     mapTheme()
 
#Upper Roxborough
 
 const_UR <- st_intersection(const_spatial, URBound)
 
 UpperRoxborough <- UpperRoxborough %>%
   filter(CLASS_NAME == 'Loss')
 
 base_mapUR <- get_map(location =  unname(st_bbox(ll(st_buffer(st_centroid(UpperRoxborough),1100)))),
                     maptype = "satellite") #get a basemap
 
urNeigh <- ggmap(base_mapUR, darken = c(0.3, "white")) + 
   geom_sf(data = ll(URBound), fill = "black", alpha = 0.8, inherit.aes = FALSE) +
   geom_sf(data = ll(UpperRoxborough), aes(colour = CLASS_NAME), colour = "red", inherit.aes = FALSE) +
   geom_sf(data = ll(const_UR), size = 0.1, inherit.aes = FALSE) +
   scale_fill_manual(values = paletteJ, name = "Canopy Change") +
   labs(title = "Upper Roxborough", subtitle = "Philadelphia, PA") +
     mapTheme()
LandUseFishnet_ResTrans<- 
LandUseFishnet%>% 
filter(Descriptio == "Residential" | Descriptio == "Transportation")%>% 
dplyr::select(uniqueID, Descriptio)%>% 
mutate(Area = as.numeric(st_area(.)))%>% 
st_drop_geometry()%>%
group_by(uniqueID)%>% 
summarise(Area = sum(Area))%>%
mutate(pctTransRes = Area / 2608225)%>% 
dplyr::select(pctTransRes, uniqueID)

LandUseFishnet_Trans<- 
LandUseFishnet%>% 
filter(Descriptio == "Transportation")%>% 
dplyr::select(uniqueID, Descriptio)%>% 
mutate(Area = as.numeric(st_area(.)))%>% 
st_drop_geometry()%>%
group_by(uniqueID)%>% 
summarise(Area = sum(Area))%>%
mutate(pctTrans = Area / 2608225)%>% 
dplyr::select(pctTrans, uniqueID)

LandUseFishnet_Res<- 
LandUseFishnet%>% 
filter(Descriptio == "Residential")%>% 
dplyr::select(uniqueID, Descriptio)%>% 
mutate(Area = as.numeric(st_area(.)))%>% 
st_drop_geometry()%>%
group_by(uniqueID)%>% 
summarise(Area = sum(Area))%>%
mutate(pctRes = Area / 2608225)%>% 
dplyr::select(pctRes, uniqueID)

FinalFishnet<- 
left_join(LandUseFishnet_ResTrans, LandUseFishnet_Res)%>% 
left_join(., LandUseFishnet_Trans)%>% 
mutate_all(~replace(., is.na(.), 0))%>%
left_join(FinalFishnet, .)%>% 
mutate(LogPctRes = log10(pctRes), 
LogPctResTrans = log10(pctTransRes))
function(measureFrom,measureTo,k) {
  measureFrom_Matrix <-
    as.matrix(measureFrom)
  measureTo_Matrix <-
    as.matrix(measureTo)
  nn <-   
    get.knnx(measureTo, measureFrom, k)$nn.dist
  output <-
    as.data.frame(nn) %>%
    rownames_to_column(var = "thisPoint") %>%
    gather(points, point_distance, V1:ncol(.)) %>%
    arrange(as.numeric(thisPoint)) %>%
    group_by(thisPoint) %>%
    summarize(pointDistance = mean(point_distance)) %>%
    arrange(as.numeric(thisPoint)) %>% 
    dplyr::select(-thisPoint) %>%
    pull()
  
  return(output)  
}

st_c <- st_coordinates

# Construction nearest neighbor
constfilter <- 
  const%>% filter(permitdescription == "NEW CONSTRUCTION PERMIT")
  

TreeCanopyCentroids <- 
  TreeCanopy%>% 
  mutate(Construction_nn1 = nn_function(st_c(st_centroid(.)), st_c(const), 1)) %>%
  dplyr::select(Construction_nn1, geometry, CLASS_NAME)


TreeCanopyCentroids1<- 
  TreeCanopyCentroids%>%
  st_drop_geometry()%>%
  group_by(CLASS_NAME)%>%
  summarise(avg_nnConst = mean(Construction_nn1))

# ggplot(TreeCanopyCentroids1, aes(y=avg_nnConst, x=CLASS_NAME))+
#   geom_bar(stat='identity', fill="forest green", width = 0.4)+
#   labs(title = "Construction Nearest Neighbor and Canopy Change")+
#   ylab("Distance to nearest construction (ft)")+
#   xlab("Tree Canopy Change")+
#   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

FinalFishnet <- 
  FinalFishnet%>% 
  st_transform('ESRI:102728')
  

FinalFishnet2 <- 
  st_join(FinalFishnet, TreeCanopyCentroids)%>%
  st_drop_geometry()%>%
  group_by(uniqueID)%>%
  summarise(avg_nnConst = mean(Construction_nn1))

FinalFishnet2 <- FinalFishnet2%>% 
  left_join(FinalFishnet, .)

FinalFishnet <- FinalFishnet2
#

# ggplot(FinalFishnet, aes(y = LogPctLoss , x = avg_nnConst))+
#   geom_point()+ 
#   labs(title = "Canopy Loss and Construction", 
#        subtilte = "Aggregated by Fishnet") + 
#   xlab("Construction distance") + 
#   ylab("Log Canopy Loss") + 
#   theme(plot.title = element_text(hjust = 0.5, size = 15, face = "bold"), plot.subtitle = element_text(hjust = 0.5, size = 8)) +
#   plotTheme()
poles <- st_read('http://data.phl.opendata.arcgis.com/datasets/9059a7546b6a4d658bef9ce9c84e4b03_0.geojson') %>% 
st_transform('ESRI:102728')%>% 
st_centroid()

poles_nn1 <- 
  TreeCanopy%>% 
  mutate(Poles_nn1 = nn_function(st_c(st_centroid(.)), st_c(poles), 10))
  
TreeCanopyCentroids <- st_join(FinalFishnet, poles_nn1)

TreeCanopyCentroids1<- 
  TreeCanopyCentroids%>% 
  st_drop_geometry()%>%
  group_by(uniqueID)%>%
  summarise(avg_nnPole = mean(Poles_nn1))

FinalFishnet<-
  TreeCanopyCentroids1%>% 
  left_join(FinalFishnet, .)%>% 
  mutate(LogPole = log10(avg_nnPole), 
  LogPctGain = log10(pctGain))

Finally, we visualize the spatial density of additional selected risk factors below.

vars_net <- FinalFishnet %>%
  dplyr::select(pctCoverage08, HydrologyArea, avgParcelSize, e311Count, countConst, pctTransRes, pctRes, pctTrans)

#make vars_net.long to use for mapping
vars_net.long <-
   gather(vars_net, Variable, value, -geometry)

#make list of unique variables
vars <- unique(vars_net.long$Variable)
mapList <- list()

#map risk factors
for(i in vars){
  mapList[[i]] <- 
    ggplot() +
      geom_sf(data = filter(vars_net.long, Variable == i), aes(fill=value), colour=NA) +
      scale_fill_viridis(option = "A", name="") +
      labs(title=i) +
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()
  }

do.call(grid.arrange,c(mapList, ncol = 3, top = "Risk Factors by Fishnet"))

# #########
# polePlot <-
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "Distance", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
# geom_sf(data = ll(FinalFishnet), aes(fill = q5(avg_nnPole)), colour = "transparent", inherit.aes = FALSE) +
#   labs(title = "Poles (Nearest Neighbor)", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# 
# parcelPlot <- 
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "Area (ft^2)", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
#   geom_sf(data = ll(FinalFishnet), aes(fill = q5(avgParcelSize)), colour = "transparent", inherit.aes = FALSE, alpha = 0.75) +
#   labs(title = "Average Parcel Size", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# hydroPlot <- 
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "Area (ft^2)", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
#   geom_sf(data = ll(HydrologyNet), aes(fill = HydrologyAreaCat), colour = "transparent", inherit.aes = FALSE, alpha = 0.75) +
#   labs(title = "Proximity to water", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# constructPlot <-
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "Count", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
# geom_sf(data = ll(FinalFishnet), aes(fill = q5(countConst)), colour = "transparent", inherit.aes = FALSE) +
#   labs(title = "Construction Permits", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# transPlot <-
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "%", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
# geom_sf(data = ll(FinalFishnet), aes(fill = q5(pctTrans)), colour = "transparent", inherit.aes = FALSE) +
#   labs(title = "Transportation Land Use Parcels", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# resPlot <-
#   ggmap(base_map) +
#   scale_fill_brewer(palette = "PiYG", name = "%", direction = 1, aesthetics = c("colour", "fill"), guide = guide_legend(reverse = TRUE)) +
# geom_sf(data = ll(FinalFishnet), aes(fill = q5(pctRes)), colour = "transparent", inherit.aes = FALSE) +
#   labs(title = "Residential Land Use Parcels", subtitle = "Philadelphia, PA") +
#   theme(plot.title = element_text(size = 30, face = "bold"), 
#         legend.title = element_text(size = 12)) +  mapTheme()
# 
# grid.arrange(hydroPlot, parcelPlot, polePlot, transPlot, resPlot, constructPlot, ncol = 2, top = "Canopy Loss Risk Factors by Fishnet")
grid.arrange(pbNeigh, urNeigh, rmNeigh, ncol = 1, top = "Construction and Canopy Loss in 3 Neighborhoods")
library(spdep)
library(FNN)
library(spatstat)
#assign weights at the grid cell level
final_net.nb <- poly2nb(as_Spatial(FinalFishnet), queen=TRUE)
final_net.weights <- nb2listw(final_net.nb, style="W", zero.policy=TRUE)

FinalFishnet <- FinalFishnet %>%
  na.exclude()

#add Moran's I and P value to final_net
final_net.localMorans <- 
  cbind(
    as.data.frame(localmoran(FinalFishnet$pctLoss, final_net.weights)),
    as.data.frame(FinalFishnet)) %>% 
    st_sf() %>%
      dplyr::select(Percent_Loss = pctLoss, 
                    Local_Morans_I = Ii, 
                    Morans_P_Value = `Pr(z > 0)`) %>%
      mutate(Sig_Hotspots = ifelse(Morans_P_Value <= 0.0000001, 1, 0)) %>% #change P value
      gather(Variable, Value, -geometry)

#get unique values for mapping
vars <- unique(final_net.localMorans$Variable)
varList <- list()

#map moran's I and P Value
for(i in vars){
  varList[[i]] <- 
    ggplot() +
      geom_sf(data = filter(final_net.localMorans, Variable == i), 
              aes(fill = Value), colour=NA) +
      scale_fill_viridis(name="", option = "B") +
      labs(title=i) +
    theme(plot.title = element_text(size = 30, face = "bold"), 
          legend.title = element_text(size = 12)) +  mapTheme()}

do.call(grid.arrange,c(varList, ncol = 4, top = "Figure 5: Local Moran's I Statistics, Observed Arrests"))

Random Forest modelling

Pairwise correlations

The following plot visualizes correlations between numeric risk factors and tree canopy loss.

corrplotvars <- FinalFishnet %>%
   dplyr::select(ConservationPct, pctLoss, pctGain, pctCoverage08, LogPctLoss, LogPctGain, population, medHHInc, housing_units, Rent, pctBach, pctWhite, pctNoVehicle, HydrologyArea, LogAvgParcelSize, e311Count, countConst, pctRes, pctTrans) %>%
  rename(transportation_landuse = pctTrans,
         residential_landuse = pctRes,
         construction_permits = countConst,
         service_requests311 = e311Count,
         parcel_size = LogAvgParcelSize,
         hydrology = HydrologyArea,
         no_vehicle_household = pctNoVehicle,
         white_residents = pctWhite,
         bachelors_degrees = pctBach,
         rent = Rent,
         median_income = medHHInc,
         canopy_gain = LogPctGain,
         canopy_loss = LogPctLoss,
         tree_canopy = pctCoverage08,
         percent_gain = pctGain,
         percent_loss = pctLoss)

numericVars <-
  select_if(st_drop_geometry(corrplotvars), is.numeric) %>% na.omit()

ggcorrplot(
  round(cor(numericVars), 1),
  p.mat = cor_pmat(numericVars),
  colors = c("pink", "purple","blue"),
  type="upper",
  insig = "blank", 
  outline.color = "white") +  
    labs(title = "Numeric Variables Correlation Plot") +
    theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +
  plotTheme()

Based on this plot, we select the following variables to test in our model:

  • Percent tree canopy coverage 2018
  • Parcel size (Log average)
  • Construction distance (Average nearest neighbor)
  • Poles (Log)
  • Neighborhood name
  • Construction permits (Count)
  • 311 Requests (Log)
  • Percent residential land use (Log)
  • Percent transportation land use (Log)
  • HOLC redlining grade
  • Hydrology (%)
neigh_net <- st_intersection(fishnet_centroid, Neighborhood) %>%
   dplyr::select(uniqueID, NAME)
   

FinalFishnet <-
   neigh_net %>%
   st_drop_geometry() %>%
   left_join(FinalFishnet, .)%>%
   mutate(NAME = replace_na(NAME, "unclassified"))

FinalFishnet5 <-
  FinalFishnet %>%
  mutate(LogPctLoss = log(pctLoss),
         LogPctGain = log(pctGain),
         LogPctChange = log(pctChange),
         LogPctTrans = log(pctTrans))

# test <- FinalFishnet %>%
#   select(holc_grade)

FinalFishnet5$avg_nnConst <- ifelse(is.na(FinalFishnet5$avg_nnConst),0, FinalFishnet5$avg_nnConst)
FinalFishnet5$avg_nnPole <- ifelse(is.na(FinalFishnet5$avg_nnPole),0, FinalFishnet5$avg_nnPole)
FinalFishnet5$pctWhite <- ifelse(is.na(FinalFishnet5$pctWhite),0, FinalFishnet5$pctWhite)
FinalFishnet5$pctBach <- ifelse(is.na(FinalFishnet5$pctBach),0, FinalFishnet5$pctBach)
FinalFishnet5$population <- ifelse(is.na(FinalFishnet5$population),0, FinalFishnet5$population)
FinalFishnet5$italian <- ifelse(is.na(FinalFishnet5$italian),0, FinalFishnet5$italian)
FinalFishnet5$medHHInc <- ifelse(is.na(FinalFishnet5$medHHInc),0, FinalFishnet5$medHHInc)
FinalFishnet5$e311Count <- ifelse(is.na(FinalFishnet5$e311Count),0, FinalFishnet5$e311Count)
FinalFishnet5$pctTransRes <- ifelse(is.na(FinalFishnet5$pctTransRes),0, FinalFishnet5$pctTransRes)
FinalFishnet5$pctRes <- ifelse(is.na(FinalFishnet5$pctRes),0, FinalFishnet5$pctRes)
FinalFishnet5$pctTrans <- ifelse(is.na(FinalFishnet5$pctTrans),0, FinalFishnet5$pctTrans)
FinalFishnet5$LogAvgParcelSize <- ifelse(is.na(FinalFishnet5$LogAvgParcelSize),0, FinalFishnet5$LogAvgParcelSize)
FinalFishnet5$LogPctChange <- ifelse(is.na(FinalFishnet5$LogPctChange),0, FinalFishnet5$LogPctChange)
FinalFishnet5$Loge311Count <- ifelse(is.na(FinalFishnet5$Loge311Count),0, FinalFishnet5$Loge311Count)
FinalFishnet5$LogPctRes <- ifelse(is.na(FinalFishnet5$LogPctRes),0, FinalFishnet5$LogPctRes)
FinalFishnet5$LogPctRes <- ifelse(is.infinite(FinalFishnet5$LogPctRes),0, FinalFishnet5$LogPctRes)
FinalFishnet5$LogPctTrans <- ifelse(is.na(FinalFishnet5$LogPctTrans),0, FinalFishnet5$LogPctTrans)
FinalFishnet5$LogPctTrans <- ifelse(is.infinite(FinalFishnet5$LogPctTrans),0, FinalFishnet5$LogPctTrans)
FinalFishnet5$LogPctResTrans <- ifelse(is.infinite(FinalFishnet5$LogPctResTrans),0, FinalFishnet5$LogPctResTrans)
FinalFishnet5$LogPctResTrans <- ifelse(is.na(FinalFishnet5$LogPctResTrans),0, FinalFishnet5$LogPctResTrans)
FinalFishnet5$LogPctLoss <- ifelse(is.infinite(FinalFishnet5$LogPctLoss),0, FinalFishnet5$LogPctLoss)
FinalFishnet5$LogPctGain <- ifelse(is.infinite(FinalFishnet5$LogPctGain),0, FinalFishnet5$LogPctGain)
FinalFishnet5$LogPctChange <- ifelse(is.infinite(FinalFishnet5$LogPctChange),0, FinalFishnet5$LogPctChange)
FinalFishnet5$countConst <- ifelse(is.na(FinalFishnet5$countConst),0, FinalFishnet5$countConst)
FinalFishnet5$Variable <- ifelse((FinalFishnet5$netChange > 0), 1, 0)
FinalFishnet5$HydrologyPct <- ifelse(is.na(FinalFishnet5$HydrologyPct),0, FinalFishnet5$HydrologyPct)
FinalFishnet5$pctLoss <- ifelse(is.na(FinalFishnet5$pctLoss),0, FinalFishnet5$pctLoss)
FinalFishnet5$LogPole <- ifelse(is.na(FinalFishnet5$LogPole),0, FinalFishnet5$LogPole)
FinalFishnet5$LogPole <- ifelse(is.infinite(FinalFishnet5$LogPole),0, FinalFishnet5$LogPole)
  
FinalFishnet5 <-
  FinalFishnet5 %>%
  dplyr::select(uniqueID, pctCoverage18, LogAvgParcelSize,Loge311Count, LogPctRes, avg_nnConst,avg_nnPole, pctLoss, holc_grade, NAME,
                                    countConst, pctWhite, pctBach, e311Count, Loge311Count, pctTransRes, pctRes, HydrologyPct,
                                    pctTrans, LogPctResTrans, population, italian, medHHInc, Variable, pctCoverage08Cat, LogPctTrans, LogPole)

# hist(FinalFishnet5$pctLoss,xlab="Percent Loss",
#      breaks=50, col="purple", main = "Histogram of Percent Loss ")

FinalFishnet5$Variable <- ifelse(FinalFishnet5$pctLoss > 25,1, 0)


# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctCoverage18 = ifelse(pctCoverage18 > 25, 1, 0))
# 
# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctWhite = ifelse(pctWhite > 66.67, 1, 0))
# 
# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctBach = ifelse(pctBach > 66.67, 1, 0))
# 
# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctRes = ifelse(pctRes > 66.67, 1, 0))
# 
# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctTransRes = ifelse(pctTransRes > 50, 1, 0))
# 
# FinalFishnet5 <-
#   FinalFishnet5%>%
#   mutate(pctTrans = ifelse(pctTrans > 66.67, 1, 0))


# ggplot() +
#   geom_sf(data = FinalFishnet, aes(fill = holc_grade)) +
#   mapTheme()


# RANDOM FOREST MODEL
set.seed(1214)
trainIndex <- createDataPartition(
                                  y = FinalFishnet5$holc_grade,
                                  p = 0.60, 
                                  list = FALSE)

finalTrain <- FinalFishnet5[ trainIndex,] %>% st_drop_geometry()
finalTest  <- FinalFishnet5[-trainIndex,] %>% st_drop_geometry()
finalTest2  <- FinalFishnet5[-trainIndex,] 

# MODEL
# finalModel <- glm(finalTrain$Variable ~ .,
#                     data=finalTrain %>% 
#                      dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst,avg_nnPole,
#                                     countConst, pctWhite, pctBach, Loge311Count, pctRes,
#                                     pctTrans,  italian, medHHInc, LogPctRes, holc_grade, HydrologyPct),#, LogPctResTrans, population, e311Count, pctTransRes
#                     family="binomial" (link="logit"))
# 
# summary(finalModel)
# 
# ## Adding Coefficients
# x <- finalModel$coefficients
# 
# ### Fit metrics
# #pR2(finalModel)
# 
# ## Prediction
# testProbs <- data.frame(Outcome = as.factor(finalTest$Variable),
#                         Probs = predict(finalModel, finalTest, type= "response"))
# 
# # ROC Curve
# ## This us a goodness of fit measure, 1 would be a perfect fit, .5 is a coin toss
# auc(testProbs$Outcome, testProbs$Probs)
# 
# ggplot(testProbs, aes(d = as.numeric(testProbs$Outcome), m = Probs)) +
#   geom_roc(n.cuts = 50, labels = FALSE, colour = "#FE9900") +
#   style_roc(theme = theme_grey) +
#   geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'grey') +
#   labs(title = "ROC Curve - Model with Feature Engineering")


rf <- randomForest(
  finalTrain$Variable ~ .,
  data=finalTrain %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst,LogPctTrans, LogPole, NAME,
                                    countConst, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct)
)

#pred2 = data.frame(predict(rf, newdata=finalTest))

#cm = table(finalTest$Variable, pred2)

## Prediction
testProbs2 <- data.frame(Outcome = as.factor(finalTest$Variable),
                        Probs = predict(rf, finalTest, type= "response"))

# ROC Curve
## This us a goodness of fit measure, 1 would be a perfect fit, .5 is a coin toss
auc(testProbs2$Outcome, testProbs2$Probs)
## Area under the curve: 0.8048
ggplot(testProbs2, aes(d = as.numeric(testProbs2$Outcome), m = Probs)) +
  geom_roc(n.cuts = 50, labels = FALSE, colour = "magenta") +
  style_roc(theme = theme_grey) +
  geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'black') +
  labs(title = "ROC Curve - Model with Feature Engineering") +
  plotTheme()

## optimizing threshold

iterateThresholds <- function(data, observedClass, predictedProbs, group) {
#This function takes as its inputs, a data frame with an observed binomial class (1 or 0); a vector of predicted probabilities; and optionally a group indicator like race. It returns accuracy plus counts and rates of confusion matrix outcomes. It's a bit verbose because of the if (missing(group)). I don't know another way to make an optional parameter.
  observedClass <- enquo(observedClass)
  predictedProbs <- enquo(predictedProbs)
  group <- enquo(group)
  x = .01
  all_prediction <- data.frame()
  
  if (missing(group)) {
  
    while (x <= 1) {
    this_prediction <- data.frame()
    
    this_prediction <-
      data %>%
      mutate(predclass = ifelse(!!predictedProbs > x, 1,0)) %>%
      count(predclass, !!observedClass) %>%
      summarize(Count_TN = sum(n[predclass==0 & !!observedClass==0]),
                Count_TP = sum(n[predclass==1 & !!observedClass==1]),
                Count_FN = sum(n[predclass==0 & !!observedClass==1]),
                Count_FP = sum(n[predclass==1 & !!observedClass==0]),
                Rate_TP = Count_TP / (Count_TP + Count_FN),
                Rate_FP = Count_FP / (Count_FP + Count_TN),
                Rate_FN = Count_FN / (Count_FN + Count_TP),
                Rate_TN = Count_TN / (Count_TN + Count_FP),
                Accuracy = (Count_TP + Count_TN) / 
                           (Count_TP + Count_TN + Count_FN + Count_FP)) %>%
      mutate(Threshold = round(x,2))
    
    all_prediction <- rbind(all_prediction,this_prediction)
    x <- x + .01
  }
  return(all_prediction)
  }
  else if (!missing(group)) { 
   while (x <= 1) {
    this_prediction <- data.frame()
    
    this_prediction <-
      data %>%
      mutate(predclass = ifelse(!!predictedProbs > x, 1,0)) %>%
      group_by(!!group) %>%
      count(predclass, !!observedClass) %>%
      summarize(Count_TN = sum(n[predclass==0 & !!observedClass==0]),
                Count_TP = sum(n[predclass==1 & !!observedClass==1]),
                Count_FN = sum(n[predclass==0 & !!observedClass==1]),
                Count_FP = sum(n[predclass==1 & !!observedClass==0]),
                Rate_TP = Count_TP / (Count_TP + Count_FN),
                Rate_FP = Count_FP / (Count_FP + Count_TN),
                Rate_FN = Count_FN / (Count_FN + Count_TP),
                Rate_TN = Count_TN / (Count_TN + Count_FP),
                Accuracy = (Count_TP + Count_TN) / 
                           (Count_TP + Count_TN + Count_FN + Count_FP)) %>%
      mutate(Threshold = round(x,2))
    
    all_prediction <- rbind(all_prediction,this_prediction)
    x <- x + .01
  }
  return(all_prediction)
  }
}

whichThreshold <- 
  iterateThresholds(
     data=testProbs2, observedClass = Outcome, predictedProbs = Probs)

whichThreshold <- 
  whichThreshold %>%
    dplyr::select(starts_with("Rate"), Threshold) %>%
    gather(Variable, Rate, Rate_TN,Rate_TP,Rate_FN,Rate_FP) 

whichThreshold %>%
  ggplot(.,aes(Threshold,Rate,colour = Variable)) +
  geom_point() +
  scale_colour_manual(values = c("magenta", "lavender", "turquoise", "light green")) +    
  labs(title = "Confusion Metric Rates at Different Thresholds",
       y = "Count") +
  plotTheme() +
  guides(colour=guide_legend(title = "Confusion Metric")) 

Results

FinalFishnet5$countConst1 <- FinalFishnet5$countConst * 0.50      #75 less
FinalFishnet5$countConst2 <- FinalFishnet5$countConst * 0.75       #50 less
FinalFishnet5$countConst3 <- FinalFishnet5$countConst * 1.25      #50 more
FinalFishnet5$countConst4 <- FinalFishnet5$countConst * 1.5       #100 more


FinalFishnet5$avg_nnConst1 <- FinalFishnet5$avg_nnConst * 1.5 

FinalFishnet5$avg_nnConst2 <- FinalFishnet5$avg_nnConst * 1.25

FinalFishnet5$avg_nnConst3 <- FinalFishnet5$avg_nnConst * .75

FinalFishnet5$avg_nnConst4 <- FinalFishnet5$avg_nnConst * .5


set.seed(77) 

##SCENARIO 1: 25 less
rf1 <- randomForest(
  FinalFishnet5$Variable ~ .,
  data=st_drop_geometry(FinalFishnet5) %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst1,LogPctTrans, LogPole, NAME,
                                    countConst1, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct))
testProbs_const1 <- data.frame(Outcome = as.factor(FinalFishnet5$Variable),       ## Prediction
                        Probs = predict(rf1, FinalFishnet5, type= "response"))
testProbs_const1 <- testProbs_const1 %>% mutate(Risk_Cat=
                                            case_when(Probs <=.2 ~ "Very Low",
                                                   Probs > 0.2 & Probs < .4 ~ "Low",    
                                                   Probs >= 0.4 & Probs < .6 ~ "Moderate",
                                                   Probs >=.6 & Probs < .80 ~ "High",
                                                   Probs >=.80 ~ "Severe"))
testProbs_const1$Risk_Cat <- factor(testProbs_const1$Risk_Cat, levels = c("Very Low", "Low", "Moderate", "High", "Severe"))
FinalFishnet2$uniqueID <- seq.int(nrow(FinalFishnet5))
testProbs_const1$uniqueID <- seq.int(nrow(FinalFishnet5))
construction1 <- left_join(FinalFishnet2, testProbs_const1)
construction1 <- construction1 %>%
  mutate(Scenario = "const_25less") %>%
  dplyr::select (Outcome,Probs,Risk_Cat,geometry,Scenario)





##SCENARIO 2: 50 less
rf2 <- randomForest(
  FinalFishnet5$Variable ~ .,
  data=st_drop_geometry(FinalFishnet5) %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst2,LogPctTrans, LogPole, NAME,
                                    countConst2, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct))
testProbs_const2 <- data.frame(Outcome = as.factor(FinalFishnet5$Variable),             #prediction   
                        Probs = predict(rf2, FinalFishnet5, type= "response"))
testProbs_const2 <- testProbs_const2 %>% mutate(Risk_Cat=
                                            case_when(Probs <=.2 ~ "Very Low",
                                                   Probs > 0.2 & Probs < .4 ~ "Low",    
                                                   Probs >= 0.4 & Probs < .6 ~ "Moderate",
                                                   Probs >=.6 & Probs < .80 ~ "High",
                                                   Probs >=.8 ~ "Severe"))
testProbs_const2$Risk_Cat <- factor(testProbs_const2$Risk_Cat, levels = c("Very Low", "Low", "Moderate", "High", "Severe"))
FinalFishnet2$uniqueID <- seq.int(nrow(FinalFishnet5))
testProbs_const2$uniqueID <- seq.int(nrow(FinalFishnet5))
construction2 <- left_join(FinalFishnet2, testProbs_const2)
construction2 <- construction2 %>%
  mutate(Scenario = "const_50less") %>%
  dplyr::select (Outcome,Probs,Risk_Cat,geometry,Scenario)





##SCENARIO 3: 50 more
rf3 <- randomForest(
  FinalFishnet5$Variable ~ .,
  data=st_drop_geometry(FinalFishnet5) %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst3,LogPctTrans, LogPole, NAME,
                                    countConst3, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct))
testProbs_const3 <- data.frame(Outcome = as.factor(FinalFishnet5$Variable),      # Prediction
                        Probs = predict(rf3, FinalFishnet5, type= "response"))
testProbs_const3 <- testProbs_const3 %>% mutate(Risk_Cat=
                                            case_when(Probs <=.2 ~ "Very Low",
                                                   Probs > 0.2 & Probs < .4 ~ "Low",    
                                                   Probs >= 0.4 & Probs < .6 ~ "Moderate",
                                                   Probs >=.6 & Probs < .80 ~ "High",
                                                   Probs >=.8 ~ "Severe"))
testProbs_const3$Risk_Cat <- factor(testProbs_const3$Risk_Cat, levels = c("Very Low", "Low", "Moderate", "High", "Severe"))
FinalFishnet2$uniqueID <- seq.int(nrow(FinalFishnet5))
testProbs_const3$uniqueID <- seq.int(nrow(FinalFishnet5))
construction3 <- left_join(FinalFishnet2, testProbs_const3)
construction3 <- construction3 %>%
  mutate(Scenario = "const_50more") %>%
  dplyr::select (Outcome,Probs,Risk_Cat,geometry,Scenario)





##SCENARIO 4: 100 more
rf4 <- randomForest(
  FinalFishnet5$Variable ~ .,
  data=st_drop_geometry(FinalFishnet5) %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst4,LogPctTrans, LogPole, NAME,
                                    countConst4, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct))
testProbs_const4 <- data.frame(Outcome = as.factor(FinalFishnet5$Variable),     #Prediction
                        Probs = predict(rf4, FinalFishnet5, type= "response"))
testProbs_const4 <- testProbs_const4 %>% mutate(Risk_Cat=
                                            case_when(Probs <=.2 ~ "Very Low",
                                                   Probs > 0.2 & Probs < .4 ~ "Low",    
                                                   Probs >= 0.4 & Probs < .6 ~ "Moderate",
                                                   Probs >=.6 & Probs < .80 ~ "High",
                                                   Probs >=.8 ~ "Severe"))
testProbs_const4$Risk_Cat <- factor(testProbs_const4$Risk_Cat, levels = c("Very Low", "Low", "Moderate", "High", "Severe"))
FinalFishnet2$uniqueID <- seq.int(nrow(FinalFishnet5))
testProbs_const4$uniqueID <- seq.int(nrow(FinalFishnet5))
construction4 <- left_join(FinalFishnet2, testProbs_const4)
construction4 <- construction4 %>%
  mutate(Scenario = "const_100more") %>%
  dplyr::select (Outcome,Probs,Risk_Cat,geometry,Scenario)





##SCENARIO 5: original (current construction levels)
rf5 <- randomForest(
  FinalFishnet5$Variable ~ .,
  data=st_drop_geometry(FinalFishnet5) %>% 
                     dplyr::select(pctCoverage18, LogAvgParcelSize, avg_nnConst,LogPctTrans, LogPole, NAME,
                                    countConst, Loge311Count,pctTrans, LogPctRes, holc_grade, HydrologyPct))  
testProbs_const5 <- data.frame(Outcome = as.factor(FinalFishnet5$Variable),        #Prediction
                        Probs = predict(rf5, FinalFishnet5, type= "response"))
testProbs_const5 <- testProbs_const5 %>% mutate(Risk_Cat=
                                            case_when(Probs <=.2 ~ "Very Low",
                                                   Probs > 0.2 & Probs < .4 ~ "Low",    
                                                   Probs >= 0.4 & Probs < .6 ~ "Moderate",
                                                   Probs >=.6 & Probs < .80 ~ "High",
                                                   Probs >=.8 ~ "Severe"))
testProbs_const5$Risk_Cat <- factor(testProbs_const5$Risk_Cat, levels = c("Very Low", "Low", "Moderate", "High", "Severe"))
FinalFishnet2$uniqueID <- seq.int(nrow(FinalFishnet5))
testProbs_const5$uniqueID <- seq.int(nrow(FinalFishnet5))
construction5 <- left_join(FinalFishnet2, testProbs_const5)
construction5 <- construction5 %>%
  mutate(Scenario = "const_original") %>%
  dplyr::select (Outcome,Probs,Risk_Cat,geometry,Scenario)


const_scenarios <- rbind(construction1, construction2, construction3, construction4, construction5)



#SCENARIO 1
less25 <- ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", inherit.aes = FALSE)+
  geom_sf(data=ll(construction1), aes(fill=Risk_Cat), color="transparent", inherit.aes = FALSE)+
  labs(title="25% Lower")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Risk Score", direction = -1, aesthetics = c("colour", "fill"))


less50 <- ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", inherit.aes = FALSE)+
  geom_sf(data=ll(construction2), aes(fill=Risk_Cat), color="transparent", inherit.aes = FALSE)+
  labs(title="50% Lower")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Risk Score", direction = -1, aesthetics = c("colour", "fill"))


more50 <- ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", inherit.aes = FALSE)+
  geom_sf(data= ll(construction3), aes(fill=Risk_Cat), color="transparent", inherit.aes = FALSE)+
  labs(title="50% Higher")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Risk Score", direction = -1, aesthetics = c("colour", "fill"))

more100 <- ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", inherit.aes = FALSE)+
  geom_sf(data= ll(construction4), aes(fill=Risk_Cat), color="transparent", inherit.aes = FALSE)+
  labs(title="100% Higher")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Risk Score", direction = -1, aesthetics = c("colour", "fill"))

original <- ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", colour = "transparent", inherit.aes = FALSE)+
  geom_sf(data= ll(construction5), aes(fill=Risk_Cat), color="transparent", inherit.aes = FALSE)+
  labs(title="Original")+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme() +
  scale_fill_brewer(palette = "PiYG", name = "Risk Score", direction = -1, aesthetics = c("colour", "fill"))


mylegend<-g_legend(original)
grid.arrange(arrangeGrob(less50 + theme(legend.position="none"), less25 + theme(legend.position="none"), original + theme(legend.position="none"), more50 + theme(legend.position="none"), more100 + theme(legend.position="none"), ncol = 2, top = "Canopy Change Under 5 Construction Scenarios", mylegend))

Validation

How accurate is our model?

testProbs2$predClass  = ifelse(testProbs2$Probs > 0.35 , 1,0)

confusion <- caret::confusionMatrix(reference = as.factor(testProbs2$Outcome),
                        data = as.factor(testProbs2$predClass),
                        positive = "1")


 draw_confusion_matrix <- function(cm) {

   layout(matrix(c(1,1,2)))
   par(mar=c(2,2,2,2))
   plot(c(100, 345), c(300, 450), type = "n", xlab="", ylab="", xaxt='n', yaxt='n')
   title('CONFUSION MATRIX', cex.main=2)

   # create the matrix
   rect(150, 430, 240, 370, col='#3F97D0')
   text(195, 435, 'Low Loss', cex=1.2)
   rect(250, 430, 340, 370, col='#F7AD50')
   text(295, 435, 'Moderate - High Loss', cex=1.2)
   text(125, 370, 'Predicted', cex=1.3, srt=90, font=2)
   text(245, 450, 'Actual', cex=1.3, font=2)
   rect(150, 305, 240, 365, col='#F7AD50')
   rect(250, 305, 340, 365, col='#3F97D0')
   text(140, 400, 'Low Loss', cex=1.2, srt=90)
   text(140, 335, 'Moderate - High Loss', cex=1.2, srt=90)

   # add in the cm results
   res <- as.numeric(cm$table)
   text(195, 400, res[1], cex=1.6, font=2, col='white')
   text(195, 335, res[2], cex=1.6, font=2, col='white')
   text(295, 400, res[3], cex=1.6, font=2, col='white')
   text(295, 335, res[4], cex=1.6, font=2, col='white')

   # add in the specifics
   plot(c(100, 0), c(100, 0), type = "n", xlab="", ylab="", main = "DETAILS", xaxt='n', yaxt='n')
   text(10, 85, names(cm$byClass[1]), cex=1.2, font=2)
   text(10, 70, round(as.numeric(cm$byClass[1]), 3), cex=1.2)
   text(30, 85, names(cm$byClass[2]), cex=1.2, font=2)
   text(30, 70, round(as.numeric(cm$byClass[2]), 3), cex=1.2)
   text(50, 85, names(cm$byClass[5]), cex=1.2, font=2)
   text(50, 70, round(as.numeric(cm$byClass[5]), 3), cex=1.2)
   text(70, 85, names(cm$byClass[6]), cex=1.2, font=2)
   text(70, 70, round(as.numeric(cm$byClass[6]), 3), cex=1.2)
   text(90, 85, names(cm$byClass[7]), cex=1.2, font=2)
   text(90, 70, round(as.numeric(cm$byClass[7]), 3), cex=1.2)

   # add in the accuracy information
   text(30, 35, names(cm$overall[1]), cex=1.5, font=2)
   text(30, 20, round(as.numeric(cm$overall[1]), 3), cex=1.4)
   text(70, 35, names(cm$overall[2]), cex=1.5, font=2)
   text(70, 20, round(as.numeric(cm$overall[2]), 3), cex=1.4)
 }

draw_confusion_matrix(confusion)

#
#
finalTest2$uniqueID <- seq.int(nrow(finalTest))

testProbs2$uniqueID <- seq.int(nrow(finalTest))

try <- left_join(finalTest2, testProbs2)

try <-
  try%>%
  mutate(result = "0")%>%
  mutate(result = ifelse(Outcome == 0 & predClass == 0, "True Negative", result))%>%
  mutate(result = ifelse(Outcome == 1 & predClass == 1, "True Positive", result))%>%
  mutate(result = ifelse(Outcome == 0 & predClass == 1, "False Positive", result))%>%
  mutate(result = ifelse(Outcome == 1 & predClass == 0, "False Negative", result))


palette3 <- c("magenta",  "blue", "purple", "green")

ggmap(base_map) +
  geom_sf(data = ll(fishnet), fill = "white", colour = "transparent", inherit.aes = FALSE)+
  geom_sf(data = ll(try), aes(fill = result), colour = "transparent", inherit.aes = FALSE)+
  labs(title = "Mapped Correlation Matrix")+
  scale_fill_manual(values = palette3, name = "Model Metric",
                  guide = guide_legend(reverse = TRUE))+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5))+
  theme(plot.title = element_text(size = 30, face = "bold"), 
        legend.title = element_text(size = 12)) +  mapTheme()

How generalizable is our model?

## Cross validation
ctrl <- trainControl(method = "cv", number = 10, classProbs=TRUE,savePredictions = TRUE,verboseIter = TRUE)

FinalFishnet5$Variable <- as.factor(FinalFishnet5$Variable)
FinalFishnet5$pctCoverage0 <- as.numeric(FinalFishnet5$pctCoverage18)

FinalFishnet5$Variable2 <- ifelse(FinalFishnet5$Variable == 0, "A", "B")
FinalFishnet5$Variable2 <- as.factor(FinalFishnet5$Variable2)

# FinalFishnet5 %>%
#   mutate(Variable = factor(Variable,
#                         labels = make.names(levels(Variable))))
myGrid <- expand.grid(mtry = c(1, 2, 3, 5, 10),
                      splitrule = c("gini", "extratrees"),
                      min.node.size = 1) ## Minimal node size; default 1 for classification

reg.cv <-train(Variable2 ~ ., data = st_drop_geometry(FinalFishnet5) %>%
                                dplyr::select(Variable2, pctCoverage18, LogAvgParcelSize, avg_nnConst,avg_nnPole, NAME,
                                    countConst, Loge311Count, pctTrans, LogPctRes, holc_grade, HydrologyPct),
              method="ranger",
              tuneGrid = myGrid,
              trControl = ctrl)#, metrics = 'ROC', family="binomial")

reg.cv

reg.cv1 <-train(Variable2 ~ ., data = st_drop_geometry(FinalFishnet5) %>%
                                dplyr::select(Variable2, pctCoverage18, LogAvgParcelSize, avg_nnConst,avg_nnPole, NAME,
                                    countConst, Loge311Count, pctTrans, LogPctRes, holc_grade, HydrologyPct),
              method="rf",
              trControl = ctrl)#, metrics = 'ROC', family="binomial")

reg.cv1

x <- evalm(reg.cv1)

# x <- as.data.frame(x)
# 
# ## get roc curve plotted in ggplot2
# 
# x$roc
# 
# ## get AUC and other metrics

x$stdres

#Goodness of metric
dplyr::select(reg.cv$resample, -Resample) %>%
  gather(metric, value) %>%
  left_join(gather(reg.cv$results[4:5], metric, mean)) %>%
  group_by(metric) %>%
  mutate(mean1 = mean(value)) %>%
  ggplot(aes(value)) +
  geom_histogram(bins = 70, fill = "#FF006A") +
  facet_wrap(~metric) +
  geom_vline(aes(xintercept = mean1), colour = "#981FAC", linetype = 3, size = 1.5) +
  scale_x_continuous(limits = c(0, 1)) +
  labs(x="Goodness of Fit", y="Count", title="CV Goodness of Fit Metrics",
       subtitle = "Across-fold mean represented as dotted lines") +
  plotTheme()

How well does our model predict across space?

To further test our model’s generalizability, we conducted a spatial cross validation. By leaving out one neighborhood at a time, we were able to see how well our model predicts across space.

# Model Validation
crossValidate <- function(dataset, id, dependentVariable, indVariables) {
  
  allPredictions <- data.frame()
  cvID_list <- unique(dataset[[id]])
  
  for (i in cvID_list) {
    
    thisFold <- i
    cat("This hold out fold is", thisFold, "\n")
    
    fold.train <- filter(dataset, dataset[[id]] != thisFold) %>% as.data.frame() %>% 
      dplyr::select(id, geometry, indVariables, dependentVariable)
    fold.test  <- filter(dataset, dataset[[id]] == thisFold) %>% as.data.frame() %>% 
      dplyr::select(id, geometry, indVariables, dependentVariable)
    
    regression <-
      glm(Variable2 ~ ., family = "binomial", 
          data = fold.train %>% 
            dplyr::select(-geometry, -id))
    
    thisPrediction <- 
      mutate(fold.test, Prediction = predict(regression, fold.test, type = "response"))
    
    allPredictions <-
      rbind(allPredictions, thisPrediction)
    
  }
  return(st_sf(allPredictions))
}



reg.vars <- c("Variable2", "pctCoverage18", "LogAvgParcelSize", "avg_nnConst","avg_nnPole", "NAME",
              "countConst", "Loge311Count", "pctTrans", "LogPctRes", "holc_grade", "HydrologyPct")


reg.spatialCV <- crossValidate(
  dataset = FinalFishnet5,
  id = "NAME",
  dependentVariable = "Variable2",
  indVariables = reg.vars) 

reg.spatialcv <-
  reg.spatialCV %>%
  dplyr::select(cvID = NAME, Variable2, Prediction, geometry)

ggplot() +
  geom_sf(data = reg.spatialcv, aes(fill = Prediction), color = "transparent")+
  geom_sf(data = FinalFishnet5, fill = "transparent", color = "white", size=.5)+
  labs(title="Predicted Probabilities")+
  mapTheme()

Conclusion